diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2015-02-14 18:44:00 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2015-02-14 18:44:00 +0000 |
commit | 20f161ca116b8a4fc7ac986a317d7f6d43e5c173 (patch) | |
tree | e61bb7f98a2c80dd9264c5f3810c4765419e64b7 | |
download | libwww-perl-tarball-master.tar.gz |
libwww-perl-6.13HEADlibwww-perl-6.13master
70 files changed, 14740 insertions, 0 deletions
@@ -0,0 +1,120 @@ +Adam Newby <adam@NewsNow.co.uk> +Albert Dvornik <bert@genscan.com> +Alexandre Duret-Lutz <duret_g@lrde.epita.fr> +Andreas Gustafsson <gson@araneus.fi> +Andreas König <andreas.koenig@anima.de> +Andreas König <koenig@mind.de> +Andrew Pimlott <andrew@pimlott.net> +Andy Lester <andy@petdance.com> +Ben Coleman <bcoleman@mindspring.com> +Benjamin Low <ben@snrc.uow.edu.au> +Ben Low <ben@snrc.uow.edu.au> +Ben Tilly <Ben_Tilly@trepp.com> +Blair Zajac <blair@gps.caltech.edu> +Blair Zajac <blair@orcaware.com> +Bob Dalgleish +BooK <book@netcourrier.com> +Brad Hughes <brad@tmc.naecker.com> +Brian J. Murrell +Brian McCauley <B.A.McCauley@bham.ac.uk> +Charles C. Fu <ccwf@bacchus.com> +Charles Lane <lane@DUPHY4.Physics.Drexel.Edu> +Chris Nandor <pudge@pobox.com> +Christian Gilmore <cgilmore@tivoli.com> +Chris W. Unger <cunger@cas.org> +Craig Macdonald <craig@freeasphost.co.uk> +Dale Couch <dcouch@training.orl.lmco.com> +Dan Kubb <dan.kubb@onautopilot.com> +Dave Dunkin <dave_dunkin@hotmail.com> +Dave W. Smith <dws@postcognitive.com> +David Coppit <david@coppit.org> +David Dick <david_dick@iprimus.com.au> +David D. Kilzer <ddkilzer@madison.dseg.ti.com> +Doug MacEachern <dougm@covalent.net> +Doug MacEachern <dougm@osf.org> +Doug MacEachern <dougm@pobox.com> +Edward Avis <epa98@doc.ic.ac.uk> +<erik@mediator.uni-c.dk> +Gary Shea <shea@gtsdesign.com> +Gisle Aas <aas@oslonett.no> +Gisle Aas <aas@sn.no> +Gisle Aas <gisle@aas.no> +Gisle Aas <gisle@ActiveState.com> +Graham Barr +Gurusamy Sarathy <gsar@ActiveState.com> +Gurusamy Sarathy <gsar@engin.umich.edu> +Hans de Graaff <hans@degraaff.org> +Harald Joerg <haj@oook.m.uunet.de> +Harry Bochner <bochner@das.harvard.edu> +Hugo <hv@crypt.compulink.co.uk> +Ilya Zakharevich +INOUE Yoshinari <inoue@kusm.kyoto-u.ac.jp> +Ivan Panchenko +Jack Shirazi +James Tillman +Jan Dubois <jand@ActiveState.com> +Jared Rhine +Jim Stern <jstern@world.northgrum.com> +Joao Lopes <developer@st3tailor.com.br> +John Klar <j.klar@xpedite.com> +Johnny Lee <typo_pl@hotmail.com> +Josh Kronengold <mneme@mcny.com> +Josh Rai <josh@rai.name> +Joshua Chamas <joshua@chamas.com> +Joshua Hoblitt <jhoblitt@ifa.hawaii.edu> +Kartik Subbarao <subbarao@computer.org> +Keiichiro Nagano <knagano@sodan.org> +Ken Williams <ken@mathforum.org> +KONISHI Katsuhiro <konishi@din.or.jp> +Lee T Lindley <Lee.Lindley@viasystems.com> +Liam Quinn <liam@htmlhelp.com> +Marc Hedlund <hedlund@best.com> +Marc Langheinrich <marc@ccm.cl.nec.co.jp> +Mark D. Anderson <mda@discerning.com> +Marko Asplund <aspa@hip.fi> +Mark Stosberg <markstos@cpan.org> +Markus B Krüger <markusk@pvv.org> +Markus Laker <mlaker@contax.co.uk> +Martijn Koster <m.koster@nexor.co.uk> +Martin Thurn <mthurn@northropgrumman.com> +Matthew Eldridge <eldridge@Graphics.Stanford.EDU> +<Matthew.van.Eerde@hbinc.com> +Matt Sergeant <matt-news@sergeant.org> +Michael A. Chase <mchase@ix.netcom.com> +Michael Quaranta <quaranta@vnet.IBM.COM> +Michael Thompson <mickey@berkeley.innomedia.com> +Mike Schilli <schilli1@pacbell.net> +Moshe Kaminsky <kaminsky@math.huji.ac.il> +Nathan Torkington <gnat@frii.com> +Nicolai Langfeldt <janl@ifi.uio.no> +Nicolai Langfeldt <janl@math.uio.no> +Norton Allen <allen@huarp.harvard.edu> +Olly Betts <olly@muscat.co.uk> +Paul J. Schinder <schinder@leprss.gsfc.nasa.gov> +<peterm@zeta.org.au> +Philip GuentherDaniel Buenzli <buenzli@rzu.unizh.ch> +Pon Hwa Lin <koala@fragment.com> +Radoslaw Zielinski <radek@karnet.pl> +Radu Greab <radu@netsoft.ro> +Randal L. Schwartz <merlyn@stonehenge.com> +Richard Chen <richard@lexitech.com> +Robin Barker <Robin.Barker@npl.co.uk> +Roy Fielding <fielding@beach.w3.org> +Sander van Zoest <sander@covalent.net> +Sean M. Burke <sburke@cpan.org> +<shildreth@emsphone.com> +Slaven Rezic <slaven@rezic.de> +Steve A Fink <steve@fink.com> +Steve Hay <steve.hay@uk.radan.com> +Steven Butler <stevenb@kjross.com.au> +<Steve_Kilbane@cegelecproj.co.uk> +Takanori Ugai <ugai@jp.fujitsu.com> +Thomas Lotterer <thl@dev.de.cw.com> +Tim Bunce +Tom Hughes <thh@cyberscience.com> +Tony Finch <fanf@demon.net> +Ville Skyttä <ville.skytta@iki.fi> +Ward Vandewege <ward@pong.be> +William York <william@mathworks.com> +Yale Huang <yale@sdf-eu.org> +Yitzchak Scott-Thoennes <sthoenna@efn.org> @@ -0,0 +1,3889 @@ +_______________________________________________________________________________ +2015-02-14 Release 6.13 + +Karen Etheridge (1): + - fixed Makefile.PL compatibility with older ExtUtils::MakeMaker (from + release 6.12) + +_______________________________________________________________________________ +2015-02-13 Release 6.12 + +Karen Etheridge (1): + - fixed prereq declarations in release 6.11 + +_______________________________________________________________________________ +2015-02-13 Release 6.11 + +Graham Knop (1): + - cleanup of the test running mechanism; allowing greater flexibility and + should also resolve RT#102083 + +_______________________________________________________________________________ +2015-02-12 Release 6.10 + +Karen Etheridge (1): + - lower runtime prereqs recommendation on LWP::Protocol::https to suggests, + to work around a circular dependency in CPAN clients when the 'install + recommendations' option is enabled (RT#101732) + +_______________________________________________________________________________ +2015-02-09 Release 6.09 + +Steffen Ullrich (1): + - checks for EINTR now also check EWOULDBLOCK (they sometimes differ on + MSWin32) + +Daina Pettit (1): + - fixed pod syntax + +Jason A Fesler (2): + - Fixed checking the % character in address regex + - Improved regex for literal IPv6 addresses + +_______________________________________________________________________________ +2014-07-24 Release 6.08 + +Mike Schilli (1): + Requiring Net::HTTP 6.07 to fix IPv6 support + (RT#75618 and https://github.com/libwww-perl/net-http/pull/10) + +Jason A Fesler (2): + When the hostname is an IPv6 literal, encapsulate it with [brackets] + before calling Net::HTTP [rt.cpan.org #29468] + Extra steps to make sure that the host address that has a ":" contains + only characters appropriate for an IPv6 address. + +John Wittkoski (1): + Fix doc typo for cookie_jar + +_______________________________________________________________________________ +2014-07-01 Release 6.07 + +Mike Schilli (5): + Removed Data::Dump references in test suite and dependency in Makefile.PL + Added MANIFEST.SKIP to enable "make manifest". + release script now checks for MacOS to avoid incompatible tarballs + Bumped version number to 6.07 + Fixed gnu-incompatible tarball problem ([rt.cpan.org #94844]) + +_______________________________________________________________________________ +2014-04-16 Release 6.06 + +Ville Skyttä (3): + Merge pull request #44 from dsteinbrunner/master + Spelling fixes. + Merge pull request #55 from oalders/master + +Karen Etheridge (2): + Merge pull request #38 from mbeijen/typo-sting + Merge pull request #43 from dsteinbrunner/master + +David Steinbrunner (2): + Spelling corrections + +Olaf Alders (1): + Typo fixes. + +Steffen Ullrich (1): + correct behavior for https_proxy, + e.g. don't send plain https:// requests to proxy, but instead establish + CONNECT tunnel and then send requests inside tunnel. + This change does together with a change in LWP::Protocol::https. + The change supports LWP::Protocol::https with the default + IO::Socket::SSL backend, but also with Net::SSL. Also: + - proxy authorization is supported (http://user:pass@host:port as proxy + URL, Net::SSL still needs special HTTPS_PROXY_* environemt variables, + as before) + - CONNECT request does not need to be the first request inside the + tunnel (not with Net::SSL) + - conn_cache is read and written inside request(), instead of writing in + request() and reading in _new_socket(). If a https tunnel is + established the cache_key no longer depends only on proxy host,port + but also on the tunnel endpoint + - CONNECT is a proxy request and must always use Proxy-Authorization, + not Authorization header + +turugina (1): + fix: auth-header of Digest auth did not conform to + RFC 2617 when WWW-Authenticate has 'qop' parameter. + +Gisle Aas (1): + SSL libs might trigger ENOTTY on read + +Michiel Beijen (1): + Small typo. + +_______________________________________________________________________________ +2013-03-11 Release 6.05 + +Karen Etheridge (3): + Derive message from status code if it was not provided + Merge pull request #33 from tomhukins/fix-readme + fix typo in comment + +Ville Skyttä (3): + Spelling fixes. + Spelling fix. + Merge pull request #34 from berekuk/fix-github-path + +Gisle Aas (3): + Update repo URL + With Net::HTTP 6.04 we don't need our own can_read() and sysread override + $ENV{HTTP_PROXY} might override our test setup [RT#81381] + +Vyacheslav Matyukhin (1): + fix github url in perldoc + +Slaven Rezic (1): + * Pod is utf-8 + +Peter Rabbitson (1): + Match required perl in Makefile.PL + +Tom Hukins (1): + Fix Github URLs + + + + +_______________________________________________________________________________ +2012-02-18 Release 6.04 + +Gisle Aas (4): + Typo fix; envirionment [RT#72386] + Implement $ua->is_online test + Add separate option to enable the live jigsaw tests + Merge pull request #10 from trcjr/master + +Theodore Robert Campbell Jr (3): + now with put and delete helpers + updated POD + unit tests for ua->put and ua->delete + +Peter Rabbitson (1): + These modules work with 5.8.1 + + + + +_______________________________________________________________________________ +2011-10-15 Release 6.03 + +Ville Skyttä (7): + Link updates. + Attribute documentation wording improvements. + Don't parse robots.txt response content unless it's textual. + Decode robots.txt response content before attempting to parse it. + RobotUA robots.txt response parsing cleanups. + Don't parse HEAD of robots.txt responses. + Request handler doc grammar fixes. + +Gisle Aas (6): + Pass on HTTP/1.0 if set as request protocol + Remove outdated docs (not touched since 1996 :-) + Merge pull request #22 from madsen/RT67947-verify_hostname + PERL_LWP_ENV_PROXY tweaks + lwp-request didn't respect -H Content-type [RT#70488] + lwp-request -H didn't allow repeated headers + +Christopher J. Madsen (2): + verify_hostname defaults to 0 if ssl_opts provided [RT#67947] + Test verify_hostname setting + +Bryan Cardillo (1): + Fix expect header support to work with content refs. + +Moritz Onken (1): + add PERL_LWP_ENV_PROXY env variable to enable env_proxy globally + + + +_______________________________________________________________________________ +2011-03-27 Release 6.02 + +This is the release where we try to help the CPAN-toolchain be able to install +the modules required for https-support in LWP. We have done this by unbundling +the LWP::Protocol::https module from the libwww-perl distribution. In order to +have https support you now need to install (or depend on) 'LWP::Protocol::https' +and then this will make sure that all the prerequsite modules comes along. +See [RT#66838]. + +This release also removes the old http10 modules that has really been +deprecated since v5.60. These should have been removed at the v6.00 jump, but +I forgot. + + +Christopher J. Madsen (1): + Ignores env variables when ssl_opts provided [RT#66663] + +Gisle Aas (4): + Fix typo; Authen::NTLM [RT#66884] + +Yury Zavarin (1): + Support LWP::ConnCache->new(total_capacity => undef) + + + +_______________________________________________________________________________ +2011-03-09 Release 6.01 + +Add missing HTTP::Daemon dependency for the tests. + + + +_______________________________________________________________________________ +2011-03-08 Release 6.00 + +Unbundled all modules not in the LWP:: namespace from the libwww-perl +distribution. The new broken out CPAN distribtions are File-Listing, +HTML-Form, HTTP-Cookies, HTTP-Daemon, HTTP-Date, HTTP-Message, HTTP-Negotiate, +Net-HTTP, and WWW-RobotRules. libwww-perl-6 require these to be installed. + +This release also drops the unmaintained lwp-rget script from the distribution. + +Perl v5.8.8 or better is now required. For older versions of perl please stay +with libwww-perl-5.837. + +For https://... default to verified connections with require IO::Socket::SSL +and Mozilla::CA modules to be installed. Old behaviour can be requested by +setting the PERL_LWP_SSL_VERIFY_HOSTNAME environment variable to 0. The +LWP::UserAgent got new ssl_opts method to control this as well. + +Support internationalized URLs from command line scripts and in the proxy +environment variables. + +The lwp-dump script got new --request option. + +The lwp-request script got new -E option, contributed by Tony Finch. + +Protocol handlers and callbacks can raise HTTP::Response objects as exceptions. +This will abort the current request and make LWP return the raised response. + + + +_______________________________________________________________________________ +2010-09-20 Release 5.837 + +David E. Wheeler (1): + Fix for Encode 2.40 + +Gisle Aas (2): + Fix Perl syntax error in synopsis + Allow ISO 8601 date strings when parsing Apache file listings + + + +_______________________________________________________________________________ +2010-05-13 Release 5.836 + +Gisle Aas (1): + Fix problem where $resp->base would downcase its return value + + + +_______________________________________________________________________________ +2010-05-05 Release 5.835 + +Gisle Aas (12): + simple string can be simplified + Make $mess->decoded_content remove XML encoding declarations [RT#52572] + Don't allow saving to filenames starting with '.' suggested by server + Avoid race between testing for existence of output file and opening the file + Minor doc fixup -- wrongly ucfirsted word + Use decoded_content in HTTP:Response synopsis [RT#54139] + sun.com is no more. rip! + Trivial layout tweak to reduce variable scope. + Add 'make test_hudson' target + Implement alt_charset parameter for decoded_content() + Test decoding with different charset parameters + lwp-download now needs the -s option to honor the Content-Disposition header + +Ville Skyttä (9): + Make LWP::MediaTypes::media_suffix case insensitive. + Skip XML decoding tests if XML::Simple is not available. + Documentation fixes. + Fix m_media_type => "xhtml" matching. + Make parse_head() apply to data: requests. + Documentation spelling fixes. + Documentation grammar fixes. + Use $uri->secure in m_secure if available. + Fix handling of multiple (same) base headers, and parameters in them. + +Mark Stosberg (5): + Strip out empty lines separated by CRLF + Best Practice: avoid indirect object notation + Speed up as_string by 4% by having _sorted_field_names return a reference + Speed up scan() a bit. as_string() from this branch is now 6% faster + Port over as_string() optimizations from HTTP::Headers::Fast + +Tom Hukins (2): + Link to referenced documentation. + Update repository location. + +Father Chrysostomos (1): + Remove needless (and actually harmful) local $_ + +Sean M. Burke (1): + "Perl & LWP" is available online + + + +_______________________________________________________________________________ +2009-11-21 Release 5.834 + +Gisle Aas (4): + Check for sane default_headers argument [RT#50393] + Add $ua->local_address attribute [RT#40912] + Test that generation of boundary works [RT#49396] + Page does not display the "standard" apache listing any more + +Ville Skyttä (2): + Remove unneeded executable permissions. + Switch compression/decompression to use the IO::Compress/IO::Uncompress and + Compress::Raw::Zlib family of modules. + +Slaven Rezic (1): + lwp-request should use stderr for auth [RT#21620] + + + +_______________________________________________________________________________ +2009-10-06 Release 5.833 + + +Gisle Aas (5): + Deal with cookies that expire far into the future [RT#50147] + Deal with cookies that expire at or before epoch [RT#49467] + Pass separate type for https to LWP::ConnCache [RT#48899] + Improved handling of the User-Agent header [RT#48461] + HTTP::Cookies add_cookie_header previous Cookies [RT#46106] + +Andreas J. Koenig (1): + Improve diagnostics from LWP::UserAgent::mirror [RT#48869] + +Slaven Rezic (1): + mirror should die in case X-Died is set [RT#48236] + +Ville Skyttä (1): + Increase default Net::HTTP max line length to 8k. + + + +_______________________________________________________________________________ +2009-09-21 Release 5.832 + + +Ville Skyttä (6): + Fix net test suite. + Comment spelling fixes. + Fix links to old Netscape cookie specification. + Documentation spelling fixes. + Improve max line length exceeded/read error messages. + Do not warn about seemingly wellformed but unrecognized robots.txt lines. + +Gisle Aas (1): + $mess->content_charset would fail for empty content + +mschilli (1): + Further restrict what variables env_proxy() process + + + +_______________________________________________________________________________ +2009-08-13 Release 5.831 + + +Ville Skyttä (3): + Fix bzip2 content encoding/decoding. + send_te() doc grammar fix. + Document time2str() behavior with an undefined argument. + +Gisle Aas (1): + HTML::Message's content_charset trigger warnings from HTML::Parser [RT#48621] + + + +_______________________________________________________________________________ +2009-07-26 Release 5.830 + +Gisle Aas (1): + select doesn't return undef on failure [RT#32356] + +Ville Skyttä (1): + Add raw RFC 1951 deflate test case. + + + +_______________________________________________________________________________ +2009-07-07 Release 5.829 + +This release removes callback handlers that were left over on the returned +HTTP::Responses. This was problematic because it created reference loops +preventing the Perl garbage collector from releasing their memory. Another +problem was that Storable by default would not serialize these objects any +more. + +This release also adds support for locating HTML::Form inputs by id or class +attribute; for instance $form->value("#foo", 42) will set the value on the +input with the ID of "foo". + + +Gisle Aas (5): + Make the example code 'use strict' clean by adding a my + Avoid cycle in response + Clean up handlers has from response after data processing is done + Support finding inputs by id or class in HTML::Form + Test HTML::Form selectors + +Mark Stosberg (1): + Tidy and document the internals of mirror() better [RT#23450] + +phrstbrn (1): + Avoid warnings from HTML::Form [RT#42654] + + + +_______________________________________________________________________________ +2009-06-25 Release 5.828 + +A quick new release to restore compatibility with perl-5.6. + + +Gisle Aas (4): + Less noisy behaviour when we can't download the documents + Restore perl-5.6 compatibility [RT#47054] + Don't decode US-ASCII and ISO-8859-1 content + Some versions of Encode don't support UTF-16-BE [RT#47152] + +Ville Skyttä (1): + Spelling fixes. + + + +_______________________________________________________________________________ +2009-06-15 Release 5.827 + +The main news this time is better detection of what character set the document +in a response uses and the addition of the lwp-dump script that I found useful. + + +Gisle Aas (31): + Added lwp-dump script + Replace calls to $req->url with $req->uri + Also need to encode strings in the latin1 range + Ignore the value set for file inputs [RT#46911] + Add docs to lwp-dump + Don't let lwp-dump follow redirects + Support --method options + Implement the --agent option + Dictionary order for the option docs; document --method + Merge branch 'dump' + Files are passed as an array and we must not stringify it. + Add content_charset method to HTTP::Message + Start guessing the charset for a message + Let content_charset guess the charset to use for decoded_content + Specify what's missing for the XML and HTML case + Provide charset parameter for HTML::Form->parse() + Make content_charset sniff for <meta> elements specifying the charset. + Determine charset of XML documents + Get rid of the _trivial_http_get() implementation + Update the bundled media.types file + LWP::Simple::get() now returns decoded_content [RT#44435] + Implement content_type_charset method for HTTP::Headers + Implement content_is_text method for HTTP::Headers + Make use of content_is_text and content_type_charset in decoded_content + Don't let the parse_head callback append to the HTTP headers + Don't set Range header on request when max_size is used [RT#17208] + Still show client headers for internal responses + Document Client-Warning: Internal response + Don't use 'no' as example domain for no_proxy docs [RT#43728] + Drop exit from the Makefile.PL [RT#43060] + Merge branch 'content_charset' + +Alex Kapranoff (1): + Support "accept-charset" attribute in HTML::Form + +Mark Stosberg (1): + new tests for max_size and 206 responses [RT#46230] + +murphy (1): + Reformulation of Client-Warning: Internal documentation + + + +_______________________________________________________________________________ +2009-04-24 Release 5.826 + +Gisle Aas (2): + Avoid returning stale Content-Type header after message parts have been updated + Don't let content saved to file be affected by the $\ setting + +Graeme Thompson (1): + Issues around multipart boundaries [RT#28970] + +Mike Schilli (1): + Ignore random _proxy env variables, allow only valid schemes + +Slaven Rezic (1): + README.SSL is not anymore available at the linpro.no URL. + +john9art (1): + Make LWP::UserAgent constructor honor the default_headers option [RT#16637] + + + +_______________________________________________________________________________ +2009-02-16 Release 5.825 + +Zefram (1): + Fixup test failure with perl-5.8.8 and older; qr/$/m doesn't work + + + +_______________________________________________________________________________ +2009-02-13 Release 5.824 + +Gisle Aas (7): + Make format_request() ensure that it returns bytes [RT#42396] + Force bytes in all the format_* methods. + Ignore Sitemap: lines in robots.txt [RT#42420] + Refactor; use variable to hold the test port + Add redirects method to HTTP::Message + Setting $ua->max_redirect(0) didn't work [RT#40260] + Convert files to UTF-8 + +Zefram (2): + HTTP::Cookies destructor should not clobber $! and other globals. + Deal with the Encode module distributed with perl-5.8.0 + +Ian Kilgore (1): + Avoid failure if 127.0.0.1:8333 is in use [RT#42866] + +Ville Skyttä (1): + Documentation improvements, spelling fixes. + + + +_______________________________________________________________________________ +2008-12-05 Release 5.823 + +Gisle Aas (4): + Bring back the LWP::Debug code [RT#41759] + Add documentation section about 'Network traffic monitoring'. + Typo fixes + Want to ensure we get a single value back here. + + + +_______________________________________________________________________________ +2008-12-05 Release 5.822 + +Gisle Aas (4): + A more modern user_agent example. + Make it possible to unset the proxy settings again + Prefer use specified Content-Length header [RT#41462] + Deprecate LWP::Debug + + + +_______________________________________________________________________________ +2008-11-25 Release 5.821 + +Various bug fixes. + + +Gisle Aas (3): + The Content-Length and Content-MD5 headers are no longer valid after encode/decode + Add META information + croak on bad proxy args [RT#39919] + +Slaven Rezic (1): + Skip a failing decoded_content on systems without Encode.pm [RT#40735] + +Steve Hay (1): + Skip LWP test when fork() is unimplemented + +Yuri Karaban (1): + redo used incorrectly in LWP::Protocol::http [RT#41116] + +jefflee (1): + HTTP::Cookies::Microsoft now handles user names with space [RT#40917] + +ruff (1): + Avoid aborting requests saved to file early [RT#40985] + + + +_______________________________________________________________________________ +2008-11-05 Release 5.820 + +Main news is the ability to control the heuristics used to determine +the expiry time for response objects. + + +Gisle Aas (8): + Reformat later parts of Changes + Add a paragraph to summarize the motivation for releases since 5.815 + all_pod_files_ok(); + Fix POD markup error + Calculation of current_age with missing Client-Date. + The age/freshness methods now take an optional 'time' argument + More correct matching of 'max-age' in freshness_lifetime method + The freshness_lifetime method now support options to control its heuristics + + +_______________________________________________________________________________ +2008-10-20 Release 5.819 + +Release 5.815 to 5.818 had a severe bug that sometimes made LWP not +collect all data for the responses it received. This release is +strongly recommended as an upgrade for those releases. + + +Gisle Aas (2): + Don't override $Net::HTTPS::SSL_SOCKET_CLASS if it's already set. + Wrong content handlers would sometimes be skipped [RT#40187] + + +_______________________________________________________________________________ +2008-10-16 Release 5.818 + +Main news in this release is the addition of the dump() method to the +request and response objects. If found that I often ended up printing +$resp->as_string for debugging and then regretting after the terminal +got into a strange mode or just kept on scrolling for the longest +time. + + +Gisle Aas (8): + Use deflate compression instead of gzip for the test + Simplify; Get rid of the $USE_STORABLE_DCLONE configuration + Add dump method to HTTP::Message. + Use $res->dump instead of rolling our own. + Layout change; move headers() methods to a more logical place. + Add support for x-bzip2 encoding; fix bzip2 decoding. + Add send_header method to HTTP::Daemon + Make the lwp-request User-Agent string include the LWP version. + +Slaven Rezic (1): + clone implemented in terms of Storable::dclone [RT#39611] + + +_______________________________________________________________________________ +2008-10-10 Release 5.817 + +This is the release where I played around with Devel::NYTProf to +figure where time was actually spent during the processing of requests +with LWP. The methods that manipulated header objects stood out, so +this release contains a few tweaks to make those parts faster. + +I also figured a few convenience methods to encode and decode the +content of request/response objects would be in order. + + +Gisle Aas (16): + Should store "wire" headers field names with _ without translation. + Test HTTP::Request->parse(). + Restore pre-5.815 behaviour of returning "400 Bad Request" [RT#39694] + Rename the HTTP::Status constants to have HTTP_ prefix + Detection of unsupported schemes was broken [RT#37637] + Allow tainted methods to be forwarded to HTTP::Headers [RT#38736] + Add strict mode to HTML::Form + Fix test now that less warnings are generated. + Add content_is_xml method + Make split_header_words() lower case returned tokens/keys + Avoid invoking AUTOLOAD on object destruction [RT#39852] + Add decode() method to HTTP::Message + Add encode() method to HTTP::Message + Allow multiple fields to be set with push_header(). + Make content_type and content_is_xhtml methods faster + Faster push_header() + + +_______________________________________________________________________________ +2008-09-29 Release 5.816 + +Oops, release 5.815 broke download-to-file on Windows. + + +Gisle Aas (2): + Add missing binmode() [RT#39668] + Doc tweaks + + +_______________________________________________________________________________ +2008-09-24 Release 5.815 + +The main change this time is the introduction of handlers to drive the +processing of requests in LWP::UserAgent. You can also register your +own handlers for modifying and processing requests or responses on +their way, which I think is a much more flexible approach that trying +to subclass LWP::UserAgent to customize it. If we have had these +early on then the LWP::UserAgent API could have been so much simpler +as the effect of most current attributes can easily be set up with +trivial handlers. + +Also thanks to contributions by Bron Gondwana LWP's Basic/Digest +authenticate modules now registers handlers which allow them to +automatically fill in the Authorization headers without first taking +the round-trip of a 401 response when LWP knows the credentials for a +given realm. + + +Gisle Aas (23): + We don't need to build the docs to run the tests. + Style tweaks. + The jigsaw service isn't up to much good these days. + HTTP::Cookies produces warnings for undefined cookie param names [RT#38480] + Typo fix; HTTP::Message will never include x-bzip2 in Accept-Encoding [RT#38617] + Added HTTP::Config module + Add methods to configure processing handlers. + 100 Continue response not complete. + Use 3-arg open when response content to files. + Make the show_progress attribute official (by documenting it). + Start using handlers for driving the inner logic of LWP::UserAgent. + Expose the content_is_html and content_is_xhtml methods from HTTP::Headers. + Make credentials method able to get/set values. + An auth handler per realm. + Match proxy setting for request. + Set up separate handler for adding proxy authentication. + Add request_preprepare to be able to initialize proxy early enough. + Smarter get_my_handler() that can also create handlers. + Refactor; introduce run_handlers method + Pass in handler hash to the handler callback. + Don't let version=1 override behaviour if specified with a plan Set-Cookie header. + Remove handler when we don't have a username/password for the realm. + Make tests use Test.pm + +Bron Gondwana (2): + Double-check that username or password has changed after a failed login. + Update Digest Authen to subclass Basic. + +Ondrej Hanak (1): + Avoid running out of filehandles with DYNAMIC_FILE_UPLOAD. + +Todd Lipcon (1): + Fixed parsing of header values starting with ':' [RT#39367] + +amire80 (1): + Documentation typo fixes [RT#38203] + + +_______________________________________________________________________________ +2008-07-25 Release 5.814 + +Gisle Aas (13): + Typo fix. + Add HTTP::Message::decodable() + Use decoded_content in the synopsis + Avoid adding an empty first part in $mess->add_part() + Get rid of all the manual dependency tests. + Simplify the Makefile.PL (no interactivity) + Provide DELETE method in HTTP::Request::Common [RT#37481] + Checkbox picks up nearby text in description of alternates [RT#36771] + HTML::Form::possible_values() should not returned disabled values [RT#35248] + File::Listing documentation claimed only 'unix' format was supported [RT#22021] + File::Listing only support English locales [RT#28879] + Make common-req.t use Test.pm + Typo; CAN_TALK_TO_OUTSELF + +Bill Mann (1): + Fix up File::Listings fallback to dosftp [RT#23540] + +Hans-H. Froehlich (1): + File::Listing parse failure on BSD Linux based systems [RT#26724] + + +_______________________________________________________________________________ +2008-06-17 Release 5.813 + +Ville Skytta (3): + RobotUA constructor ignores delay, use_sleep [RT#35456] + Spelling fixes [RT#35457] + Add HTTP::Response->filename [RT#35458] + +Mark Stosberg (2): + Better diagnostics when the HTML::TokeParser constructor fails [RT#35607] + Multiple forms with same-named <select> parse wrongly [RT#35607] + +Gisle Aas (1): + Provide a progress method that does something that might be useful. + +Spiros Denaxas (1): + Documentation typo fix [RT#36132] + + +_______________________________________________________________________________ +2008-04-16 Release 5.812 + +Gisle Aas (6): + Typo fix. + Simplified Net::HTTP::Methods constructor call. + Croak if Net::HTTP constructor called with no argument. + Avoid calling $self->peerport to figure out what the port is. + 5.811 breaks SSL requests [RT#35090] + Make test suite compatible with perl-5.6.1. + +Toru Yamaguchi (1): + Wrong treatment of qop value in Digest Authentication [RT#35055] + + +_______________________________________________________________________________ +2008-04-14 Release 5.811 + +Gisle Aas (6): + Avoid "used only once" warning for $Config::Config. + Make HTTP::Request::Common::PUT set Content-Length header [RT#34772] + Added the add_content_utf8 method to HTTP::Message. + Typo fix. + Retry syscalls when they fail with EINTR or EAGAIN [RT#34093,32356] + Allow HTTP::Content content that can be downgraded to bytes. + +Gavin Peters (1): + HTML::Form does not recognise multiple select items with same name [RT#18993] + +Mark Stosberg (1): + Document how HTTP::Status codes correspond to the classification functions [RT#20819] + +Robert Stone (1): + Allow 100, 204, 304 responses to have content [RT#17907] + +sasao (1): + HTTP::Request::Common::POST suppressed filename="0" in Content-Disposition [RT#18887] + + +_______________________________________________________________________________ +2008-04-08 Release 5.810 + +Gisle Aas (10): + Small documentation issues [RT#31346] + Explain $netloc argument to $ua->credentials [RT#31969] + Make lwp-request honour option -b while dumping links [RT#31347] + Ignore params for date convenience methods [RT#30579] + Get rid of all the old CVS $Keyword:...$ templates. Set $VERSION to 5.810. + Update Copyright year. + Drop some sample URLs that were failing. + Complement the HTTP::Status codes [RT#29619] + Don't allow HTTP::Message content to be set to Unicode strings. + Refactor test for Encode.pm + +Ville Skytta (3): + Spelling fixes [RT#33272] + Trigger HTML::HeadParser for XHTML [RT#33271] + Escape status line in error_as_HTML, convert to lowercase [RT#33270] + +Alexey Tourbin (2): + Typo fix [RT#33843] + Protocol/file.pm: postpone load of URI::Escape and HTML::Entities [RT#33842] + +Daniel Hedlund (1): + HTML::Form Module and <button> element clicks + +Adam Kennedy (1): + HTTP::Cookies handle malformed empty Set-Cookie badly [RT#29401] + +Jacob J (1): + [HTTP::Request::Common] Does not handle filenames containing " [RT#30538] + +Rolf Grossmann (1): + Allow malformed chars in $mess->decoded_content [RT#17368] + +FWILES (1): + Croak if LWP::UserAgent is constructed with hash ref as argument [RT#28597] + +Adam Sjogren (1): + Disabled, checked radiobutton being submitted [RT#33512] + +DAVIDRW (1): + warn if TextInput's maxlength exceeded [RT#32239] + + +_______________________________________________________________________________ +2007-08-05 Gisle Aas <gisle@ActiveState.com> + + Release 5.808 + + Get rid of t/live/validator test. Too much JavaScript madness + for it to be a sane LWP test. + + + +2007-07-31 Gisle Aas <gisle@ActiveState.com> + + Release 5.807 + + Apply patch correction from CPAN RT #26152 + + More laxed t/live/validator test. + + + +2007-07-19 Gisle Aas <gisle@ActiveState.com> + + Release 5.806 + + Added progress callback to LWP::UserAgent. + + HTTP::Daemon didn't avoid content in responses to HEAD requests + + Add support for HTTP Expect header to HTTP::Daemon (CPAN RT #27933) + + Fix t/base/message.t so tests are skipped if Encode is not + installed. (CPAN RT #25286) + + Add HTML::Tagset as a prerequisite to Makefile.PL + + Do not clobber $_ in LWP::Protocol::nntp (CPAN RT #25132) + + Fix lwp-download so it can download files with an "_" in the filename + (CPAN RT#26207) + + Quiet complaints from HTML::HeadParser when dealing with undecoded + UTF-8 data. (CPAN RT#20274) + + When both IO::Socket::SSL and Net::SSL are loaded, use the latter + (CPAN RT #26152) + + Allows SSL to work much more reliably: + (CPAN RT #23372) + + Allow text/vnd.wap.wml and application/vnd.oasis.opendocument.text + in content-type field in lwp-request (CPAN RT #26151) + + Add default media type for XML in LWP::MediaTypes (CPAN RT #21093) + + Added chunked test by Andreas J. Koenig + + + +2005-12-08 Gisle Aas <gisle@ActiveState.com> + + Release 5.805 + + HTTP::Date: The str2time function returned wrong values for + years in the early 20th century, because timelocal() actually + expects the year to be provided on a different scale than what + localtime() returns. + + HTTP::Headers can now be constructed with field names that repeat. + The $h->header function now also accept repeating field + names and can also remove headers if passed undef as value. + + HTML::Form: The parse method now takes hash style optional + arguments and the old verbose behaviour is now off by default. + + HTML::Form: Accept <select multiple=""> for compatibility with + other browsers. Patch by Josh Rai <josh@rai.name>. + + HTML::Form: Sane handling of 'disabled' for ListInput. + Based on patch by Joao Lopes <developer@st3tailor.com.br>. + + HTTP::Negotiate: Fixed matching of partial language tags. + Patch contributed by Dan Kubb. + + HTTP::Response: The as_string method now returns a status line + that doesn't add the "official" code name in the message + field. This improves the ability to round-trip response objects + via HTTP::Response->parse($res->as_string) and makes the first + line of the string returned agree with $res->status_line. + + Net::HTTP: The host attribute can now be set undef in + order to suppress this header for HTTP/1.0 requests. + + Net::HTTP: The default Host: header does not include the + port number if it is the default (80 for plain HTTP). Some + servers get confused by this. + + Net::HTTP: Ignore bogus Content-Length headers. Don't get + confused by leading or trailing whitespace. + + LWP::Protocol::http: More efficient sending of small PUT/POST + requests by trying harder to pass off the whole request in a + single call to syswrite. + + lwp-request now give better error messages if you used the + -o option without having the HTML-Tree distribution installed. + Also document this dependency. + + + +2005-12-06 Gisle Aas <gisle@ActiveState.com> + + Release 5.804 + + HTTP::Message->parse did not work when the first line of the body + was something that looked like a header. + + HTTP::Header::Auth needs HTTP::Headers to be loaded before + it replace its functions. + + LWP::Protocol::nntp improvements by Ville Skyttä <ville.skytta@iki.fi>: + - Support the nntp: scheme. + - Support hostname in news: and nntp: URIs. + - Close connection and preserve headers also in non-OK responses. + - HEAD support for URIs identifying a newsgroup. + - Comment spelling fixes. + + Fix quotes in Net::HTTP example. + http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=283916 + + Detect EOF when expecting a chunk header. Should address the + warnings shown in http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=286775 + + WWW::RobotRules: Improved parsing of not strictly valid robots.txt files + contributed by <Matthew.van.Eerde@hbinc.com>. + + Makefile.PL: Set name to LWP so that the .packlist ends up in the + expected place. + + + +2004-12-11 Gisle Aas <gisle@ActiveState.com> + + Release 5.803 + + HTTP::Message: $mess->decoded_content sometimes had the side + effect of destroying $mess->content. + + HTTP::Message: $mess->decoded_content failed for + "Content-Encoding: deflate" if the content was not in the + zlib-format as specified for the HTTP protocol. Microsoft got + this wrong, so we have to support raw deflate bytes as well. + + HTTP::Response->parse don't require the protocol to be + specified any more. This allows HTTP::Response->parse($resp->as_string) + to round-trip. Patch by Harald Joerg <haj@oook.m.uunet.de>. + + HTTP::Response: $resp->base might now return undef. Previously + it would croak if there was no associated request. Based on + patch by Harald Joerg <haj@oook.m.uunet.de>. + + HTML::Form now support <label> for check- and radio boxes. + Patch contributed by Dan Kubb <dan.kubb@onautopilot.com>. + + Make HTTP::Daemon subclassable, patch by Kees Cook <kees@osdl.org>. + + lwp-download allow directory to save into to be specified. + Patch by Radoslaw Zielinski <radek@karnet.pl>. + + lwp-download will validate filename derived from server + controlled data and will fail if something looks not + quite right. + + + +2004-11-30 Gisle Aas <gisle@ActiveState.com> + + Release 5.802 + + The HTTP::Message object now has a decoded_content() method. + This will return the content after any Content-Encodings and + charsets have been decoded. + + Compress::Zlib is now a prerequisite module. + + HTTP::Request::Common: The POST() function created an invalid + Content-Type header for file uploads with no parameters. + + Net::HTTP: Allow Transfer-Encoding with trailing whitespace. + <http://rt.cpan.org/Ticket/Display.html?id=3929> + + Net::HTTP: Don't allow empty content to be treated as a valid + HTTP/0.9 response. + <http://rt.cpan.org/Ticket/Display.html?id=4581> + <http://rt.cpan.org/Ticket/Display.html?id=6883> + + File::Protocol::file: Fixup directory links in HTML generated + for directories. Patch by Moshe Kaminsky <kaminsky@math.huji.ac.il>. + + Makefile.PL will try to discover misconfigured systems that + can't talk to themselves and disable tests that depend on this. + + Makefile.PL will now default to 'n' when asking about whether + to install the "GET", "HEAD", "POST" programs. There has been + too many name clashes with these common names. + + + +2004-11-12 Gisle Aas <gisle@ActiveState.com> + + Release 5.801 + + HTTP::Message improved content/content_ref interaction. Fixes + DYNAMIC_FILE_UPLOAD and other uses of code content in requests. + + HTML::Form: + - Handle clicking on nameless image. + - Don't let $form->click invoke a disabled submit button. + + HTTP::Cookies could not handle a "old-style" cookie named + "Expires". + + HTTP::Headers work-around for thread safety issue in perl <= 5.8.4. + + HTTP::Request::Common improved documentation. + + LWP::Protocol: Check that we can write to the file specified in + $ua->request(..., $file) or $ua->mirror. + + LWP::UserAgent clone() dies if proxy was not set. Patch by + Andy Lester <andy@petdance.com> + + HTTP::Methods now avoid "use of uninitialized"-warning when server + replies with incomplete status line. + + lwp-download will now actually tell you why it aborts if it runs + out of disk space of fails to write some other way. + + WWW::RobotRules: only display warning when running under 'perl -w' + and show which robots.txt file they correspond to. Based on + patch by Bill Moseley. + + WWW::RobotRules: Don't empty cache when agent() is called if the + agent name does not change. Patch by Ville Skyttä <ville.skytta@iki.fi>. + + + +2004-06-16 Gisle Aas <gisle@ActiveState.com> + + Release 5.800 + + HTML::Form will allow individual menu entries to be disabled. + This was needed to support <input type=radio disabled value=foo> + and <select><option disabled>foo. + + HTML::Form now avoids name clashes between the <select> and + <option> attributes. + + HTML::Form now implicitly close <select> elements when it sees + another input or </form>. This is closer to the MSIE behaviour. + + HTML::Form will now "support" keygen-inputs. It will not + calculate a key by itself. The user will have to set its + value for it to be returned by the form. + + HTTP::Headers now special case field names that start with a + ':'. This is used as an escape mechanism when you need the + header names to not go through canonicalization. It means + that you can force LWP to use a specific casing and even + underscores in header names. The ugly $TRANSLATE_UNDERSCORE + global has been undocumented as a result of this. + + HTTP::Message will now allow an external 'content_ref' + to be set. This can for instance be used to let HTTP::Request + objects pick up content data from some scalar variable without + having to copy it. + + HTTP::Request::Common. The individual parts will no longer + have a Content-Length header for file uploads. This improves + compatibility with "normal" browsers. + + LWP::Simple doc patch for getprint. + Contributed by Yitzchak Scott-Thoennes <sthoenna@efn.org>. + + LWP::UserAgent: New methods default_header() and + default_headers(). These can be used to set up headers that + are automatically added to requests as they are sent. This + can for instance be used to initialize various Accept headers. + + Various typo fixes by Ville Skyttä <ville.skytta@iki.fi>. + + Fixed test failure under perl-5.005. + + LWP::Protocol::loopback: This is a new protocol handler that + works like the HTTP TRACE method, it will return the request + provided to it. This is sometimes useful for testing. It can + for instance be invoked by setting the 'http_proxy' environment + variable to 'loopback:'. + + + +2004-04-13 Gisle Aas <gisle@ActiveState.com> + + Release 5.79 + + HTML::Form now exposes the 'readonly' and 'disabled' + attribute for inputs. This allows your program to simulate + JavaScript code that modifies these attributes. + + RFC 2616 says that http: referer should not be sent with + https: requests. The lwp-rget program, the $req->referer method + and the redirect handling code now try to enforce this. + Patch by Ville Skyttä <ville.skytta@iki.fi>. + + WWW::RobotRules now look for the string found in + robots.txt as a case insensitive substring from its own + User-Agent string, not the other way around. + Patch by Ville Skyttä <ville.skytta@iki.fi>. + + HTTP::Headers: New method 'header_field_names' that + return a list of names as suggested by its name. + + HTTP::Headers: $h->remove_content_headers will now + also remove the headers "Allow", "Expires" and + "Last-Modified". These are also part of the set + that RFC 2616 denote as Entity Header Fields. + + HTTP::Headers: $h->content_type is now more careful + in removing embedded space in the returned value. + It also now returns all the parameters as the second + return value as documented. + + HTTP::Headers: $h->header() now croaks. It used to + silently do nothing. + + HTTP::Headers: Documentation tweaks. Documented a + few bugs discovered during testing. + + Typo fixes to the documentation all over the place + by Ville Skyttä <ville.skytta@iki.fi>. + + Updated tests. + + + +2004-04-07 Gisle Aas <gisle@ActiveState.com> + + Release 5.78 + + Removed stray Data::Dump reference from test suite. + + Added the parse(), clear(), parts() and add_part() methods to + HTTP::Message. The HTTP::MessageParts module of 5.77 is no more. + + Added clear() and remove_content_headers() methods to + HTTP::Headers. + + The as_string() method of HTTP::Message now appends a newline + if called without arguments and the non-empty content does + not end with a newline. This ensures better compatibility with + 5.76 and older versions of libwww-perl. + + Use case insensitive lookup of hostname in $ua->credentials. + Patch by Andrew Pimlott <andrew@pimlott.net>. + + + +2004-04-06 Gisle Aas <gisle@ActiveState.com> + + Release 5.77 + + LWP::Simple did not handle redirects properly when the "Location" + header used uncommon letter casing. + Patch by Ward Vandewege <ward@pong.be>. + + LWP::UserAgent passed the wrong request to redirect_ok(). + Patch by Ville Skyttä <ville.skytta@iki.fi>. + https://rt.cpan.org/Ticket/Display.html?id=5828 + + LWP did not handle URLs like http://www.example.com?foo=bar + properly. + + LWP::RobotUA construct now accept key/value arguments in the + same way as LWP::UserAgent. + Based on patch by Andy Lester <andy@petdance.com>. + + LWP::RobotUA did not parse robots.txt files that contained + "Disallow:" using uncommon letter casing. + Patch by Liam Quinn <liam@htmlhelp.com>. + + WWW::RobotRules now allow leading space when parsing robots.txt + file as suggested by Craig Macdonald <craig@freeasphost.co.uk>. + We now also allow space before the colon. + + WWW::RobotRules did not handle User-Agent names that use complex + version numbers. Patch by Liam Quinn <liam@htmlhelp.com>. + + Case insensitive handling of hosts and domain names + in HTTP::Cookies. + https://rt.cpan.org/Ticket/Display.html?id=4530 + + The bundled media.types file now match video/quicktime + with the .mov extension, as suggested by Michel Koppelaar + <Michel.Koppelaar@kb.nl>. + + Experimental support for composite messages, currently + implemented by the HTTP::MessageParts module. Based on + ideas from Joshua Hoblitt <jhoblitt@ifa.hawaii.edu>. + + Fixed libscan in Makefile.PL. + Patch by Andy Lester <andy@petdance.com>. + + The HTTP::Message constructor now accept a plain array reference + as its $headers argument. + + The return value of the HTTP::Message as_string() method now + better conforms to the HTTP wire layout. No additional "\n" + are appended to the as_string value for HTTP::Request and + HTTP::Response. The HTTP::Request as_string now replace missing + method or URI with "-" instead of "[NO METHOD]" and "[NO URI]". + We don't want values with spaces in them, because it makes it + harder to parse. + + + +2003-11-21 Gisle Aas <gisle@ActiveState.com> + + Release 5.76 + + Revised handling of redirects. + - clear our content and content headers if we + rewrite request as GET based on patch by + Steven Butler <stevenb@kjross.com.au>. + - pass $response to redirect_ok() + + Support cpan:-URLs. Try 'lwp-download cpan:src/latest.tar.gz' :) + + Fix test failure in 't/html/form.t' for perl5.005. + + + +2003-10-26 Gisle Aas <gisle@ActiveState.com> + + Release 5.75 + + Reworked LWP::UserAgent, HTTP::Request and HTTP::Response + documentation. Also other documentation tweaks. + + + +2003-10-23 Gisle Aas <gisle@ActiveState.com> + + Release 5.74 + + Improved lwp-download program: + - set mtime if Last-Modified header reported by server + - better prompts + - avoid warnings when aborted at the wrong time + + Collected all contributions in the AUTHORS file and + also added an AUTHORS section to the LWP manpage. + + Style tweaks to all modules. Move POD after __END__ + and uncuddled elses. + + + +2003-10-19 Gisle Aas <gisle@ActiveState.com> + + Release 5.73 + + Takanori Ugai <ugai@jp.fujitsu.com> found that 'max_redirect' + introduced in 5.72 was broken and provided a patch for that. + + Not all ftp servers return 550 responses when trying to + to RETR a directory. Microsoft's IIS is one of those. + Patch provided by Thomas Lotterer <thl@dev.de.cw.com>. + + Some documentation tweaks. + + + +2003-10-15 Gisle Aas <gisle@ActiveState.com> + + Release 5.72 + + Requests for some non-HTTP URLs would fail if the cookie_jar + was enabled. The HTTP::Cookies::add_cookie_header now ignore + non-HTTP requests. + + The new local/http test failed on Windows because of a missing + binmode(). + + Suppress Client-SSL-Warning warning header when Crypt::SSLeay + is able to verify the peer certificate. Patch contributed by + Joshua Chamas <joshua@chamas.com>. + + HTTP::Request::Common::POST did not add a 'Content-Length' header + when the content ended up empty. Fixed by a patch contributed + by Brian J. Murrell. + + Internally generated responses now contain a text/plain part + that repeats the status line. They also have a "Client-Warning" + header that can be used to differentiate these responses from + real server responses. + + LWP::UserAgent now deals with 303 and 307 redirects. The behaviour + of 302 redirects has also changed to be like 303; i.e. change the + method to be "GET". This is what most browsers do. Based on + a patch contributed by Tom Hughes <thh@cyberscience.com>. + + LWP::UserAgent now implements a 'max_redirect' attribute with a + default value of 7. This should also fix the problem where + redirects to the same URL to get a cookie set did not work. + Based on a patch by Sean M. Burke <sburke@cpan.org>. + + NTLM authentication should continue to fail if the Authen::NTLM + module can't be loaded. LWP used to think the scheme was + available as soon as the module stash showed up. Not it looks + for the authenticate method to be defined. Patch by Jan Dubois. + + lwp-download will not try to rename *.tbz and *.tar.bz2 to + match the reported content type. Patch contributed by + Robin Barker <Robin.Barker@npl.co.uk>. + + HTTP::Cookies::Netscape documentation fix by Sean M. Burke. + + HTTP::Cookies::Microsoft documentation fix by Johnny Lee. + + The code that tries to avoid installing 'HEAD' on top of + 'head' on systems like Mac OS was further improved to look + in $Config{installscript} instead of $Config{sitebin}. + Patch provided by Ken Williams <ken@mathforum.org>. + + + +2003-10-14 Gisle Aas <gisle@ActiveState.com> + + Release 5.71 + + Support open-ended Range: header for ftp requests. + Patch by Matthew Eldridge <eldridge@Graphics.Stanford.EDU>. + + lwp-request now prints unsuccessful responses in the same way + as successful ones. The status will still indicate failures. + Based on a patch by Steve Hay <steve.hay@uk.radan.com>. + + HTML::Form's dump now also print alternative value names. + + HTML::Form will now pick up the phrase after a <input type=radio> + or <input type=checkbox> and use that as the name of the checked + value. + + HTML::Form's find_input now returns all inputs that match in + array context. Based on patch by Mark Stosberg <markstos@cpan.org> + in <http://rt.cpan.org/Ticket/Display.html?id=3320>. + + HTTP::Daemon's send_file() method did not work when given + a file name. Patch by Dave W. Smith <dws@postcognitive.com>. + + HTTP::Daemon is less strict about field names in the request + headers is received. The Norton Internet Security firewall + apparently likes to send requests with a header called + '~~~~~~~~~~~~~~~'. Further details in + <http://rt.cpan.org/Ticket/Display.html?id=2531>. + + LWP::Protocol::http assumed $1 would be meaningful without + testing the outcome of the match. This sometimes produced + an extra garbage Authentication header. + Based on the patch by <bai@dreamarts.co.jp> in + <http://rt.cpan.org/Ticket/Display.html?id=1994>. + + LWP::Protocol::mailto will try harder to locate the sendmail + program as suggested in <http://rt.cpan.org/Ticket/Display.html?id=2363>. + Also let $ENV{SENDMAIL} override the search. + + Patch to enable OS/2 build by Ilya Zakharevich. + + + +2003-10-13 Gisle Aas <gisle@ActiveState.com> + + Release 5.70 + + File::Listing::apache by Slaven Rezic <slaven@rezic.de> + + HEAD requests now work properly for ftp: URLs. + Patch by Ville Skyttä <ville.skytta@iki.fi>. + + LWP::UserAgent: The protocols_allowed() and protocols_forbidden() + methods are now case insensitive. Patch by Ville Skyttä + <ville.skytta@iki.fi>. + + Avoid warning from HTTP::Date on certain invalid dates. + Patch by David Dick <david_dick@iprimus.com.au>. + + HTML::Form::param() is an alternative interface for inspecting + and modifying the form values. It resembles the interface + of CGI. + + HTML::Form documentation updated. Lots of typo fixes and improves + by Martin Thurn <mthurn@northropgrumman.com>. + + HTML::Form will treat any unknown input types as text input. + This appears to be what most browsers do. + + HTML::Form::parse() can now take a HTTP::Response object + as argument. + + The "checkbox" and "option" inputs of HTML::Form can now be + turned on with the new check() method. + + The inputs of HTML::Form can now track alternative value + names and allow values to be set by these names as well. + Currently this is only supported for "option" inputs. + + HTML::Form's dump() method now print the name of the form if + present. + + + +2003-01-24 Gisle Aas <gisle@ActiveState.com> + + Release 5.69 + + Include lwptut.pod contributed by Sean M. Burke C<sburke@cpan.org>. + + The lwp-request aliases GET, HEAD, POST where installed when + no program should be. Fixed by David Miller <dave@justdave.net>. + + lwp-rget --help don't print double usage any more. + + HTTP::Header::Util is now more reluctant to put quotes around + token values. + + Net::HTTP: Avoid warning on unexpected EOF when reading chunk + header. + + + +2003-01-02 Gisle Aas <gisle@ActiveState.com> + + Release 5.68 + + Fix test failure for t/html/form.t when running under + perl-5.8.0. + + + +2003-01-01 Gisle Aas <gisle@ActiveState.com> + + Release 5.67 + + HTTP::Cookies::Microsoft contributed by Johnny Lee <typo_pl@hotmail.com>. + This module makes it possible for LWP to share MSIE's cookies. + + HTML::Form supports file upload better now. There are some + new methods on that kind of input; file(), filename(), content() + and headers(). + + Removed unfinished test that depended on Data::Dump. + + Net::HTTP avoids exceptions in read_response_headers() with + laxed option. It now always assumes HTTP/0.9 on unexpected + responses. + + HTML::Form documentation reworked. + + + +2002-12-20 Gisle Aas <gisle@ActiveState.com> + + Release 5.66 + + Various patches from Sean M. Burke. Most of them to + match up LWP with the "Perl & LWP" book. + + LWP::DebugFile module contributed by Sean. + + LWP::Authen::Ntml contributed by James Tillman. + + HTTP::Daemon patch for Alpha by <shildreth@emsphone.com> + + The format_chunk() and write_chunk() methods of Net::HTTP + did not work. Bug spotted by Yale Huang <yale@sdf-eu.org>. + + The Client-Peer response header is back. + + + +2002-05-31 Gisle Aas <gisle@ActiveState.com> + + Release 5.65 + + Make HTTP::Date compatible with perl 5.8. + + Try to avoid to default to overwriting /usr/bin/head + on MacOS X when the perl install prefix is /usr/bin. + + HTTP::Cookies fix for parsing of Netscape cookies file + on MS Windows. Patch by by Sean M. Burke <sburke@cpan.org>. + + HTTP::Negotiate doc patch from Edward Avis <epa98@doc.ic.ac.uk>. + + + +2002-02-09 Gisle Aas <gisle@ActiveState.com> + + Release 5.64 + + Simplified the Makefile.PL: + - the scripts are not longer *.PL files + + - don't try to make symlinks for GET, HEAD, POST + as that has not worked for a long time + + - the GET, HEAD, POST aliases for lwp-request should + now work on Windows. + + HTTP::Cookies: + - added 'clear_temporary_cookies' method; + patch by Mike Schilli <schilli1@pacbell.net>. + + - trailing space in old cookie parameters not ignored; + patch by Ivan Panchenko + + - protect against $SIG{__DIE__} handlers; + patch by Adam Newby <adam@NewsNow.co.uk>. + + LWP::Authen::Digest: + - abort digest auth session if we fail repeatedly with + the same username/password. + + MacOS portability patches to the test suite by + Chris Nandor <pudge@pobox.com>. + + + +2001-12-14 Gisle Aas <gisle@ActiveState.com> + + Release 5.63 + + HTTP::Negotiate: Deal with parameter names in a case + insensitive way. Put a little weight on the order of features + in the Accept headers. + + LWP::UserAgent: make sure that the 'request' attribute is + always set on the returned response. + + LWP::Protocol::http will now allow certain bad headers + in the responses it reads. The bad headers end up in the + header 'Client-Junk'. + + Net::HTTP new options to the 'read_response_headers' + method. The option 'laxed' will make it ignore bad header + lines. The option 'junk_out' can be used to pass in an + array reference. Junk header lines are pushed onto it. + + Net::HTTP::Methods: fixed the internal zlib_ok() to also + return the correct value the first time. + + LWP::Protocol::http: Ensure that we don't read until + select has said it is ok since we have put the socket + in non-blocking mode. Previously this could happen if + you set the 'timeout' attribute of the user agent to 0. + + LWP::Authen::Digest now use Digest::MD5 instead of MD5. + + Some adjustments to Makefile.PL to figure out if + Compress::Zlib is available and adjust the test suite + accordingly. + + + +2001-11-21 Gisle Aas <gisle@ActiveState.com> + + Release 5.62 + + The $VERSION of LWP::UserAgent wrapped around. This confused the + CPAN indexer. Bumped the major number to 2 to fix this. + + Net::HTTP did not work well on perl5.003. The PeerHost argument + to IO::Socket::INET was not recognized, so we had to use PeerAddr + instead. The syswrite() method also required a length argument. + + Net::HTTP did not deal with transfer encoding tokens in a + case-insensitive way. Patch by Blair Zajac <blair@orcaware.com>. + + The jigsaw-chunk test failed on MacOS because "\n" is different. + Patch by Chris Nandor <pudge@pobox.com>. + + + +2001-11-16 Gisle Aas <gisle@ActiveState.com> + + Release 5.61 + + LWP::Protocol::http did not invoke its _fixup_header method. + The result was that the 'Host' header got completely wrong + when talking through a proxy server. + + The live Google test is gone. They blocked us. + + The guts of Net::HTTP has moved to Net::HTTP::Methods. + + Net::HTTP now has limits on the size of the header which are + set by default. + + New module Net::HTTPS. + + Documentation tweaks. + + HTTP::Headers: The 'remove_header' method now return the values + of the fields removed as suggested by Blair Zajac <blair@orcaware.com>. + Also a typo fix by Blair. + + HTTP::Message: The delegation via &AUTOLOAD should be slightly + faster now. It will install a real forwarding function the + first time it is called for each HTTP::Headers method. + + LWP::UserAgent: Don't forward 'Cookie' headers on redirect. + Patch by Steve A Fink <steve@fink.com>. + + LWP::Protocol::http has been reorganized to make it simpler + to subclass it. Other minor changes to it include: + - Client-Warning is gone + - Client-Request-Num renamed to Client-Response-Num + - A 'Transfer-Encoding' header is rewritten into a + 'Client-Transfer-Encoding' header. + + LWP::Protocol::https is completely redone. + + + +2001-10-26 Gisle Aas <gisle@ActiveState.com> + + Release 5.60 + + Made HTTP/1.1 the default. The old HTTP/1.0 module has been + renamed as LWP::Protocol::http10. There is an environment + variable; PERL_LWP_USE_HTTP_10 that can be set to have LWP + still pick up the old drivers. + + Deal with "100 continue" responses even when not requested by + and Expect header in the request. MS IIS seems to eager to send + this kind of response. + + For HTTP/1.1 over SSL there was a problem with the underlying + SSL libraries if the socket was configured to non-blocking mode. + Disable this for https. + Based on a patch from Michael Thompson <mickey@berkeley.innomedia.com> + + Support the Range header for ftp:// requests. + Patch by David Coppit <david@coppit.org>. + + Rearrange Bundle::LWP on request from Chris Nandor. + + HTTP::Cookies: Allow a domain like .foo.com match host "foo.com". + Patch by Alexandre Duret-Lutz <duret_g@lrde.epita.fr> + + For redirects make sure Host header is not copied to the new + request. + + The HTML::HeadParser is not loaded until actually needed. + + Net::HTTP should now work with perl5.005 by a simple tweak + to 'require IO::Socket::INET'. + + WWW::RobotRules::AnyDBM: Explicitly clear database on open. + Some DBM implementations doesn't support the O_TRUNC flag + properly. Patch by Radu Greab <radu@netsoft.ro>. + + + +2001-09-19 Gisle Aas <gisle@ActiveState.com> + + Release 5.53_97 + + LWP::Protocol::http11: fix socket leak. Because we managed + to set up a circular reference within the sockets objects they + stayed around forever. + + LWP::UserAgent: Split up simple_request into prepare_request + and send_request. Patch contributed by Keiichiro Nagano <knagano@sodan.org> + + LWP::Protocol::http: Pass all header data to LWP::Debug::conns. + Based on patch by Martijn. + + LWP::UserAgent: Sean fixed a Cut&Paste error. + + HTTP::Cookies: avoid pack("c",...) warning from bleadperl. + + + +2001-08-27 Gisle Aas <gisle@ActiveState.com> + + Release 5.53_96 + + HTTP/1.1 support also for https. + Contributed by Doug MacEachern <dougm@covalent.net> + + The HTTP/1.1 modules are now enabled by default. Hope that will give + them more testing than they otherwise would have gotten. + + HTTP::Daemon's accept now has same behaviour as IO::Socket's + accept in list context. Fixed by Blair Zajac <blair@gps.caltech.edu>. + + More argument sanity checking in HTTP::Request->uri and + LWP::UserAgent->simple_request. Patch by Sean M. Burke. + + HTTP::Protocol::http. Deal with short writes. + Patch by Norton Allen <allen@huarp.harvard.edu> + + HTTP::Protocol::http11: Deal with newlines in header values. + + Net::HTTP: call sysread (instead of xread) when more data is required. + + + +2001-08-06 Gisle Aas <gisle@ActiveState.com> + + Release 5.53_95 + + Fix HTTP::Cookies where there is a mix of Set-Cookie and + Set-Cookie2 headers. In that case we used to ignore all Set-Cookie + headers. Now we only ignore those Set-Cookie headers that reference + the same cookie as a Set-Cookie2 header. + + HTTP::Request, HTTP::Response will by default now use "URI" class, + instead of "URI::URL", when constructing its URI objects. This + has a potential for breaking existing code as URI::URL objects had + some extra methods that external code might depend upon. + + Patches by Sean M. Burke: + - Fix treatment of language tags in HTTP::Negotiate + - Avoid trailing newline in $response->message + - HTTP::Response clarifications + + LWP::Simple deals with non-absolute redirects "correctly" now. + + Net::HTTP does not try to load Compress::Zlib until it is needed. + + Net::HTTP documentation has been updated. + + + +2001-05-05 Gisle Aas <gisle@ActiveState.com> + + Release 5.53_94 + + Sean M. Burke's update to LWP::UserAgent: + - updated redirect_ok behaviour + - new convenience methods: get/head/post/put + - protocols_allowed/protocols_forbidden + - LWP::Protocol::nogo (new module) + + Added digest auth test against Jigsaw + + Fixed a 'use of uninitialized'-warning in the handling of + digest authentication. + + Net::HTTP updates: + - new option: SendTE + - support transfer-encoding 'deflate' and 'gzip' (when Compress::Zlib + is available). + - new methods: format_chunk, format_chunk_eof + - use -1 (instead of "0E0" as signal that no data was available, + but this was not EOF). + + + +2001-04-28 Gisle Aas <gisle@ActiveState.com> + + Release 5.53_93 + + Makefile.PL now asks some questions + + Added live tests for the new HTTP/1.1 support + + LWP::MemberMixin: make it possible to set a value to the 'undef' value. + + Net::HTTP: + - transparent support for 'deflate' and 'gzip' transfer encodings + (need to have the Compress::Zlib module installed for this to work). + + + +2001-04-25 Gisle Aas <gisle@ActiveState.com> + + Release 5.53_92 + + LWP::Protocol::ftp now support keep-alives too. The command + connection will stay open if keep-alives are enabled. + + LWP::Protocol::http11 various fixes: + - chunked request content did not work + - monitor connection while sending request content + - deal with Expect: 100-continue + + LWP::RobotUA: Protect host_port call. Not all URIs have this method. + + + +2001-04-20 Gisle Aas <gisle@ActiveState.com> + + Release 5.53_91 + + Introduced LWP::ConnCache module. Works similar to HTTP::Cookies, + it that it takes effect if associated with the $ua. + + The handling of $ua->max_size changed to make 0 mean 0 + (not unlimited). A value of undef means no limit. + The X-Content-Base header is gone. I hope nobody relies on + it. It might come back if people start to scream. There + is a new Client-Aborted header instead. + + The Range header generated for $ua->max_size had an off-by-one + error. A range of "0-1" means 2 bytes. + + The LWP::UserAgent constructor now takes configuration arguments. + + Keep-alive and the new HTTP/1.1 module can now be simply + enabled with something like: + + LWP::UserAgent->new(keep_alive => 1); + + New method $ua->conn_cache to set up and access the associated + connection manager. + + If the string passed to $ua->agent() ends with space then + the "libwww-perl/#.##" string is automatically appended. + + New method $ua->_agent + + Passing a plain hash to $ua->cookie_jar automatically loads + HTTP::Cookies and initialise an object using the hash content + as constructor arguments. + + LWP::Protocol::http11 now use the conn_cache of the $ua. + + LWP::Protocol::http11 now added a few new Client- headers. + + LWP::Protocol avoid keeping the connection alive if $ua->max_size + limit prevents the whole body content from being read. + + Net::HTTP updates: + - new methods: write_chunk(), write_chunk_eof() + - reset state properly when a new body is read. + - always set read buffer empty on eof + - doc update + + WWW::RobotRules patch by Liam Quinn <liam@htmlhelp.com>: + - Always qualify netloc with port. + - Reverse User-Agent substring matching. + + + +2001-04-18 Gisle Aas <gisle@ActiveState.com> + + Release 5.53_90 + + Note: This is a developer only release. Not for production use. + + LWP::Protocol::http11 now does keep-alives by default. Still need + to figure out what interface to provide at the $ua level. + + LWP::Protocol::http11 deals with CODE content in request. + + Net::HTTP updated: + - added format_request() method + - added _rbuf and _rbuf_length methods + - read_response_headers does not return protocol version + any more. + - peer_http_version method did not work because of typo. + - documentation added + + New module Net::HTTP::NB. This is a Net::HTTP subclass that + is better suited for multiplexing as it is able to do no-blocking + reads of headers and entity body chunks. + + HTTP::Request: Protect $request->uri against evil $SIG{__DIE__} handlers. + + Some reorganisation in how stuff is passed from $ua to protocol object. + The $ua is now passed in so protocol objects might store start in it. + + The $ua->max_size default is now 0. + + The $ua->clone method avoids sharing of proxy settings between + the old and the new. + + This file is renamed to 'Changes' (used to be 'ChangeLog'). + + + +2001-04-10 Gisle Aas <gisle@ActiveState.com> + + Release 5.53 + + LWP::Simple::get() could sometimes return nothing on failure in + list context. Now it always returns 'undef'. + + HTTP::Cookies does not request 2 dots on domain names any more. + New option to hide the Cookie2 header. Cookie2 header now quote + the version number. Updated reference to RFC 2965. + + Support for embedded userinfo in http proxy URIs. It means that + you know can set up your proxy with things like: + http_proxy="http://proxyuser:proxypass@proxyhost:port" + Patch by John Klar <j.klar@xpedite.com>. + + Experimental HTTP/1.1 support. New module called Net::HTTP that + provide the lower level interface and a LWP::Protocol::http11 + module that builds on it. The HTTP/1.1 protocol module must be + loaded and registered explicitly, otherwise the old and trustworthy + HTTP/1.0 module will be used. + + LWP::Protocol::GHTTP will try to use the get_headers() methods + so that it can actually extract all the headers. + + + +2001-03-29 Gisle Aas <gisle@ActiveState.com> + + Release 5.52 + + HTTP::Header: new method $h->init_header() that will only + set the header if it is not already set. Some shuffling + around in the code. + + LWP::UserAgent will not override 'User-Agent', 'From' + or 'Range' headers if they are explicitly set in the + request passed in. + + HTML::Form tries to optimize parsing be restricting the + tags that are reported by HTML::Parser. Will need + HTML::Parser v3.19_93 or better for this to actually + have any effect. + + LWP::Protocol::ftp now deals with path parameters again. + It means that you can append ";type=a" to ftp-URI and + it will download the document in ASCII mode. + + If the server output multiple Location headers on a redirect, + ignore all but the first one. + + Extract cookies failed on request URIs with empty paths. + This was only triggered if you used URI objects directly in + scripts. + + This change was actually part of 5.51: Fix qop="auth" + handling for Digest authentication. + Patch by Dave Dunkin <dave_dunkin@hotmail.com>. + + + +2001-03-14 Gisle Aas <gisle@ActiveState.com> + + Release 5.51 + + SECURITY FIX: If LWP::UserAgent::env_proxy is called in a CGI + environment, the case-insensitivity when looking for "http_proxy" + permits "HTTP_PROXY" to be found, but this can be trivially set by the + web client using the "Proxy:" header. The fix applied is that + $ENV{HTTP_PROXY} is not longer honored for CGI scripts. + The CGI_HTTP_PROXY environment variable can be used instead. + Problem reported by Randal L. Schwartz. + + NOTE: It is recommended that everybody that use LWP::UserAgent + (including LWP::Simple) in CGI scripts upgrade to this release. + + Explicit setting of action on HTML::Form had no effect because + of a code typo. Patch by BooK <book@netcourrier.com>. + + HTTP::Daemon: The CONNECT method need special treatment because + it does not provide a URI as argument (just a "hostname:port"). + The non-upward compatibility warning is that you must now call + $request->url->host_port to get the host/port string for CONNECT, + rather than calling $request->url and using the entire string. + Based on patch from Randal L. Schwartz <merlyn@stonehenge.com> + + HTTP::Daemon: Create self URL based on $self->sockaddr. This works + better when LocalAddr is used to specify the port number. Based on + patch from Ben Low <ben@snrc.uow.edu.au>. + + Avoid potential '<FILE> chunk 1' messages at the end of the response + 'message'. + + + +2001-01-12 Gisle Aas <gisle@ActiveState.com> + + Release 5.50 + + Fix for test cases that failed because of URI-1.10 now encode + space as '+' instead of '%20. Patch by Christian Gilmore + <cgilmore@tivoli.com>. + + Makefile.PL: Require URI-1.10. + + HTTP::Daemon now accepts any non-space character as method name + on the request line. It used to fail on methods like "M-POST" + because it only allowed \w-chars. + + HTTP::Date now allow fractional seconds in ISO date formats. + Based on patch from Mark D. Anderson <mda@discerning.com> + + HTTP::Request::Common will now calculate Content-length + even if $DYNAMIC_FILE_UPLOAD is set. Patch provided by + Lindley, Lee T <Lee.Lindley@viasystems.com>. + + + +2000-12-31 Gisle Aas <gisle@ActiveState.com> + + Release 5.49 + + HTML::Form: Use croak() instead of die. Implement + $f->possible_values. Avoid use of undef value warnings. + + HTTP::Cookies: fix epath issue. Make it work for URI::http + as the uri-attribute of HTTP::Request object + + HTTP::Date: Allow ignored timezone info in parenthesis. Patch + by Sander van Zoest <sander@covalent.net>. + Fix calculation of non-GMT timezones (wrong sign). Patch by + KONISHI Katsuhiro <konishi@din.or.jp>. + + HTTP::Response: Let $res->base() absolutize the URI. Based on + bug report from Hans de Graaff <hans@degraaff.org>. + + Fixed minor doc typos in HTTP::Headers::Util and LWP::UserAgent. + + HTTP::Request::Common: Support boundary spec from client. + + LWP::Simple: Avoid trivial_http_get when @ appears in authority + part of URI + + LWP::Authen::Digest: Need to have query in URI param. + Spotted by <ronald@innovation.ch>. + + LWP::Protocol::http: unescape username/password if they are + specified in the URI. + + Added LWP::Protocol::GHTTP. This allow LWP to use the HTTP::GHTTP + module as the low level HTTP driver. + + + +2000-04-09 Gisle Aas <gisle@aas.no> + + Release 5.48 + + README.SSL update by Marko Asplund <aspa@hip.fi> + + Added cookie example to lwpcook.pod + + HTTP::Date::str2time returns undef on failure instead + of an empty list as suggested by Markus B Krüger <markusk@pvv.org> + + $request->uri($uri) will now always store a copy of the $uri. + + HTTP::Status: Added status codes from RFC 2616 and RFC 2518 (WebDAV) + + LWP::RobotUA will not parse robots.txt unless content type and + content sample looks right. + + LWP::UserAgent: Deal with multiple WWW-Authenticate headers. + Patch by Hugo <hv@crypt.compulink.co.uk> + + $ua->proxy can now return the old proxy settings without + destroying the old one. + Based on patch by Benjamin Low <ben@snrc.uow.edu.au> + + LWP::Protocol::http update + + - don't terminate header parsing on bad headers + - extra_sock_opts + - preparations for keep alive support + - method CONNECT + + WWW::RobotRules deal with various absolute URIs in the + disallow lines. + + Makefile.PL: Make sure we have HTML::TokeParser + + Clean test on VMS. + Patch by Charles Lane <lane@ DUPHY4.Physics.Drexel.Edu>. + + + +1999-11-16 Gisle Aas <gisle@aas.no> + + o Release 5.47 + + o Added HTML::Form to the distribution. + + o LWP::Protocol::ftp: Make it URI.pm compatible. We broke it in 5.45. + + o LWP::Protocol::http: Kill any Connection header + + o LWP::MediaTypes: Fixed builtin html/text mapping. + Added bz2 to suffixEncoding + + + +1999-10-28 Gisle Aas <gisle@aas.no> + + o Release 5.46 + + o Updated mailing list address + + o Avoid warnings for lwp-request -t + + o referrer as alternative spelling for referer as suggested by tchrist. + + o More conservative selection of boundary for multipart messages + in &HTTP::Request::Common::POST. + + o LWP::MediaTypes can now export &read_media_types. + + o Spelling corrections from Ben Tilly <Ben_Tilly@trepp.com> + + + +1999-09-20 Gisle Aas <gisle@aas.no> + + o Release 5.45 + + o The LWP SSL support better explained. Documentation in README.SSL + and lwpcook.pod contributed by Marko Asplund <aspa@hip.fi>. + + o LWP::Protocol::https: Try loading IO::Socket::SSL if Net::SSL is + not available. + + o lwp-mirror -t option did not work. + + o defined(@ISA) eliminated. Patch by Nathan Torkington <gnat@frii.com> + + o LWP::Protocol::ftp: Protect against empty path_segments + + + +1999-06-25 Gisle Aas <gisle@aas.no> + + o Release 5.44 + + o We require URI-1.03, since this fix some query quoting stuff + that HTTP::Request::Common rely upon. + + o 'lwp-request -HUser-Agent:foo' can now be used to set this + header too. + + o Localize $/ to ensure standard record separator a few places + in HTTP::Cookies + + o LWP::UserAgent will now set the Range header in requests if + the $ua->max_size attribute is set. + + + +1999-05-09 Gisle Aas <gisle@aas.no> + + o Release 5.43 + + o New lwp-request command line option that allow you to put any + header into the request (-H). + + o New HTTP::Date because of Y2K-problems with the old one. + It refused to parse the ftp-listing (ls -l) dates missing year. + Additional entry point is parse_date(). This function avoid any + limitations caused by the time-representation (seconds since + epoch). + + o Y2K fix to t/base/cookies.t. Netscape's original cookie + example expires at 09-Nov-99. + + o Added another binmode() to LWP::Protocol::file as suggested + by Matt Sergeant <matt-news@sergeant.org> + + + +1999-03-20 Gisle Aas <gisle@aas.no> + + o Release 5.42 + + o MacOS patches from Paul J. Schinder <schinder@leprss.gsfc.nasa.gov> + + o Documentation patch from Michael A. Chase <mchase@ix.netcom.com> + + o PREREQ_PM patch from Andreas Koenig <andreas.koenig@anima.de> + + o LWP::Simple::head fix by Richard Chen <richard@lexitech.com> + + o "LWP fails with PerlEXE"-patch from Gurusamy Sarathy + + o Allow "." in HTTP header names. Patch by Marc Langheinrich + <marc@ccm.cl.nec.co.jp> + + o Fixed reference to $uri->netloc in lwp-request + + o Cute animation in lwp-download + + + +Mon Nov 19 1998 Gisle Aas <aas@sn.no> + + o Release 5.41 + + o HTTP::Cookies provide better Netscape cookies compliance. + Send back cookies to any port, and allow origin host name to + be specified as domain, and still be treated as a domain. + Patch from Andreas Gustafsson <gson@araneus.fi>. + + o HTTP::Cookies now ignore the Expires field in Set-Cookie, if the + date provided can't be parsed by HTTP::Date. + + o HTTP::Daemon will lowercase the hostname returned from + Sys::Hostname::hostname(). This avoid some test failures in + the test suite for people with upper- or mixed-cased hostnames. + + o LWP::Protocol::gopher: IO::Socket::INET ctor did not specify + Proto => 'tcp'. This made it less portable to older IO::Socket + versions. + + o No programs installed when you build the Makefile with + 'perl Makefile.PL LIB=/my/lib' + + o LWP bundle mention Digest::MD5 instead of MD5 + + o headers-auth.t test suite bug triggered by perl5.005_5x. + Patch by Albert Dvornik <bert@genscan.com> + + o The local/http.t test actually did try to unlink("."). This was + very confusing on systems where it succeed. + + + +Mon Oct 12 1998 Gisle Aas <aas@sn.no> + + o Release 5.40_01 + + o Unbundled URI::URL modules. You now have to install the + URI.pm module in order to get libwww-perl working. + + o Made library URI.pm compatible. Made all URI object instantiations + based on $HTTP::URI_CLASS variable. + + o New lwp-rget option: --referer. + Patch by INOUE Yoshinari <inoue@kusm.kyoto-u.ac.jp>. + + o One more binmode() to HTTP::Daemon as suggested by + Markus Laker <mlaker@contax.co.uk>. + + + +Tue Aug 4 1998 Gisle Aas <aas@sn.no> + + o Release 5.36 + + o The lwp-download script will call $ua->env_proxy now. + + o The lwp-request script allows content types (specified with the -c + option) with optional parameters like: multipart/mixed; boundary="--". + + o LWP::UserAgent will lowercase all authentication parameter names + before passing it to the authentication module. Previous releases + ignored parameters like; Realm="Foo" (because Realm contained + upper case letters). + + o LWP::Protocol::ftp test for If-Modified-Since was wrong. + + o How the $url->abs method works can now be configured with the global + variables $URI::URL::ABS_ALLOW_RELATIVE_SCHEME and + $URI::URL::ABS_REMOTE_LEADING_DOTS. + + o The anonymous password guesser for ftp URLs will now call the external + `whoami` program any more. Patch by Charles C. Fu <ccwf@bacchus.com>. + + o LWP::Protocol::http now allow dynamic requests without any + Content-Length specified when Content-Type is multipart/* + + o HTTP::Request::Common can now upload infinite files. + (Controlled by the $DYNAMIC_FILE_UPLOAD configuration variable.) + + + +Fri Jul 10 1998 Gisle Aas <aas@sn.no> + + o Release 5.35 + + o More lwp-rget patches from David D. Kilzer <ddkilzer@madison.dseg.ti.com>. + Adds the following new options: --iis, --keepext, --tolower + + o LWP::MediaTypes patches from MacEachern <dougm@pobox.com>. Adds new + functions: add_type(), add_encoding(), read_media_types() + + + +Tue Jul 7 1998 Gisle Aas <aas@sn.no> + + o Release 5.34 + + o LWP::Protocol::ftp now try to use the MDTM command to support + the Last-Modified response header as well as + If-Modified-Since in requests. Original and final patch by + Charles C. Fu <ccwf@bacchus.com> + + o $url->path_components will not escape "." any more. + + o WWW::RobotRules will now work for Mac text files too (lines delimited + by CR only). Patch by Olly Betts <olly@muscat.co.uk> + + o lwp-rget support <area ..> links too. + + + +Thu May 7 1998 Gisle Aas <aas@sn.no> + + o Release 5.33 + + o LWP::Simple::get() did try to handle too many of the 3xx + codes as redirect when it bypasses full LWP. + + o LWP::UserAgent->mirror will now use utime(2) to set the + file modification time corresponding to the Last-Modified + header. + + o LWP::Protocol::http will not modify the HTTP::Request that + it is processing. This avoids sticky Host header for + redirects. + + o URI::Heuristic and lwp-download documentation update. + + + +Wed Apr 15 1998 Gisle Aas <aas@sn.no> + + o Release 5.32 + + o Much improved HTTP::Daemon class. We now support persistent + connections. Changes include: + - $c->get_request can be told to return after reading and + parsing headers only. + - $c->reason (new method) + - $c->read_buffer (new method) + - $c->proto_ge (new method) + - $c->force_last_request (new method) + - $c->send_response now support CODE reference content + and will use chunked transfer encoding for HTTP/1.1 clients. + - expanded the documentation. + + + +Fri Apr 10 1998 Gisle Aas <aas@sn.no> + + o Release 5.31 + + o Makefile.PL now checks that HTML::HeadParser is present. + + o Updated HTTP::Cookies according to draft-ietf-http-state-man-mec-08.txt + It now supports the .local domain and value less 'port' attribute in + the Set-Cookie2 header. + + o HTTP::Headers update: + - $h->content_type now always return a defined value + - $h->header($field) will now concatenate multi-valued header + fields with "," as separator in scalar context. + + o HTTP::Request::Common update: + - used to destroy the content of the hash/array arguments + passed to its constructor functions. + - allow a hash reference to specify form-data content. + - you can override Content-Disposition for form-data now. + - set content-encoding for files if applicable + - default boundary string is now always "--000" + + o LWP::UserAgent will not follow more than 13 redirects + automatically. + + + +Wed Apr 1 1998 Gisle Aas <aas@sn.no> + + o Release 5.30 + + o Unbundled the following modules: + + * HTML-Parser (HTML::Parser, HTML::Entites, HTML::LinkExtor,...) + * HTML-Tree (HTML::Element, HTML::TreeBuilder,...) + * Font-AFM (Font::AFM, Font::Metrics::*) + * File-CounterFile + + o Simplified internal structure of HTTP::Headers. Hopefully, + nobody will notice. + + o New modules HTTP::Headers::Auth, HTTP::Headers::ETag that adds + additional convenience methods to the HTTP::Headers class. + + o Removed split_etag_list() from HTTP::Headers::Util, in the hope + that nobody had starting using it. + + + +Tue Mar 24 1998 Gisle Aas <aas@sn.no> + + o Release 5.22 + + o HTTP::Cookies made more compatible with Netscape cookies. Allow + the domain to match host, allow dots in the part of the hostname + not covered by domain. Don't quote the cookie value even when it + contains non-token chars. Based on patch from Kartik Subbarao + <subbarao@computer.org>. + + o Updated HTTP::Status to reflect <draft-ietf-http-v11-spec-rev-03>. + RC_MOVED_TEMPORARILY renamed to RC_FOUND. Added codes + RC_TEMPORARY_REDIRECT (307) and RC_EXPECTATION_FAILED (417). + Slightly more documentation too. + + o The split_header_words() function HTTP::Headers::Util could go + into infinite loop on some header values. Implemented split_etag_list() + too. Added more documentation and test script for this module. + + o LWP::Simple now switch to full LWP implementation even for systems + that force all environment keys to be upper case. Modification + suggested by Dale Couch <dcouch@training.orl.lmco.com>. + + o LWP::UserAgent allows redirects to a relative URL with scheme to be + made. Suggested by Chris W. Unger <cunger@cas.org>. + + o Applied dtd2pm.pl patches from <peterm@zeta.org.au>. It can now + extract information from the HTML40.dtd + + + +Thu Mar 12 1998 Gisle Aas <aas@sn.no> + + o Release 5.21 + + o lwp-rget patches from David D. Kilzer <ddkilzer@madison.dseg.ti.com> + (modified by Gisle). Support the --hier and the --auth options + and <frame>s. + + o File::CounterFile protect against bad $/ and $\ as suggested + by Frank Hoehne. + + o File::Listing used "next" when return was more appropriate. + Patch by erik@mediator.uni-c.dk. + + o HTML::Element support for multiple boolean attributes for a single + element. Patch by Philip Guenther. + + o Can set $HTTP::Headers::TRANSLATE_UNDERSCORE to FALSE value to + suppress tr/_/-/ of header keys. + + o LWP::Protocol::http will not initialize the Host header if it is + already set. + + o LWP::Protocol::http did not handle responses with no header lines + correctly. Patch by Daniel Buenzli <buenzli@rzu.unizh.ch> + + o $url->rel() handles path segments without leading "/" better. + + + +Fri Feb 13 1998 Gisle Aas <aas@sn.no> + + o Release 5.20 + + o Fixed the "500 Offset outside string" bug that affected perl + 5.004_03 and older version of Perl. + + o Fixed a documentation typo spotted by Michael Quaranta + <quaranta@vnet.IBM.COM> + + o HTTP::Date: Protect against croaking from timegm/timelocal. + + + +Mon Jan 26 1998 Gisle Aas <aas@sn.no> + + o Release 5.19 + + o HTML::Parser does not call $self->text() callback for empty text + any more. + + o LWP::Protocol::https was noisy when connections failed and the + script was running with '-w' (noise inherited from IO::Socket::INET) + + o $ua->use_alarm(BOOL) now gives a warning if running with -w + + + +Tue Jan 20 1998 Gisle Aas <aas@sn.no> + + o Developer release 5.18_05 + + o HTTPS support based on my Crypt-SSLeay module. The Net-SSLeay module + is not supported any more. + + o lwp-request documentation typo spotted Martijn Koster. + + o Removed superfluous \\ in the URI::Escape regex. This was also + spotted by Martijn. + + o File::Listing now handles timezones correctly. + + o Added $VERSION to modules that was missing it. + + o Added 'use strict' to several modules that was missing it. + + o LWP::Protocol::http now adds the Client-Peer header to responses and + has hooks for more callbacks. + + o LWP::Protocol::https adds Client-SSL-Cipher, Client-SSL-Cert-Subject + and Client-SSL-Cert-Issuer headers to the response. The requests can + also be made conditional based on the peer certificate using the + If-SSL-Cert-Subject header in requests. + + o HTML::Parse is back. (It was even used in lwpcook.pod) + + + +Wed Dec 17 1997 Gisle Aas <aas@sn.no> + + o Developer release 5.18_04 + + o Makefile.PL fix based on report from Pon Hwa Lin <koala@fragment.com> + + o lwp-request will now print the response code message with -s and -S + options. + + o Hide IO::Socket::INET noise when running under -w + + o Don't set 'Content-Length: 0' in HTTP requests. + + o LWP::Protocol::http now calls LWP::Debug::conns again + + + +Tue Dec 16 1997 Gisle Aas <aas@sn.no> + + o Developer release 5.18_03 + + o Got rid of alarms() and replaced LWP::Socket with IO::Socket::INET. + New protocol implementations for http, https, gopher, nntp. + $ua->use_alarm() is now a noop. + + o LWP::Protocol::ftp patch from Tony Finch <fanf@demon.net>. + + o Removed deprecated modules from the distribution; HTML::Parse, + LWP::Socket, LWP::SecureSocket, LWP::IO, LWP::TkIO. + + + +Fri Dec 12 1997 Gisle Aas <aas@sn.no> + + o Release 5.18 + + o HTTP authorization patches from Tony Finch <fanf@demon.net>. + Allows "user:pass@" in HTTP URLs. + + o HTML::Parser patch by Brian McCauley <B.A.McCauley@bham.ac.uk>. + Pass original text to end() method. + + o The HTML::Parser->netscape_buggy_comment method is deprecated. + Use HTML::Parser->strict_comment instead. The default value + has changed with the name. + + o Some HTML::Parser optimization tweaks. + + o New module named HTML::Filter + + o Updated HTTP::Headers to the latest HTTP spec. Added knowledge + about the "Trailer", "Expect", "TE", "Accept-Range" headers. + "Public" header is gone. + + o Added some more header convenience methods: if_unmodified_since, + content_language, and proxy_authorization methods. + + o HTTP::{Request,Response}->clone can handle subclasses now. + + o HTTP::Request->url() can now undefine the URL. + + o HTTP::{Request,Response}->as_string format looks more like + the HTTP protocol formatting now. Dashed lines above and + below is gone. + + o Documented HTTP::Response->status_line method + + o Compressed HTML::Response->error_as_HTML output + + o HTTP::Status updated to latest HTTP spec. Added + RC_REQUEST_RANGE_NOT_SATISFIABLE (416) + + + +Tue Dec 2 1997 Gisle Aas <aas@sn.no> + + o Release 5.17 + + o All authentication handling moved out of LWP::UserAgent and into + LWP::Authen::Basic and LWP::Authen::Digest. We now also support + PROXY_AUTHENTICATION_REQUIRED responses. + + o HTML::Formatter will always add a blank line for <br>. + + o Avoid use of uninitialized value in HTTP::Daemon. + + o HTTP::Date allows seconds when recognizing 'ls -l' dates. This + allows us to parse syslog time stamps. + + o HTTP::Request::Common::POST allows a hash reference as second + argument (in addition to an array reference). + + o LWP::Simple will initialize the $ua if it is exported. + + o Various documentation updates. + + + +Fri Nov 21 1997 Gisle Aas <aas@sn.no> + + o Release 5.16 + + o LWP::Simple::head() would die in array context because str2time + was not imported any more. + + o HTTP::Daemon->accept now takes an optional package argument like + IO::Socket->accept does. + + o Made HTTP::Request and HTTP::Response subclassable. + + o Added Proxy-Authorization example to lwpcook. + + + +Thu Nov 6 1997 Gisle Aas <aas@sn.no> + + o Release 5.15 + + o New module URI::Heuristic + + o The lwp-request script now use URI::Heuristic for it's URL arguments. + It means that 'lwp-request perl' will not get a file called "./perl" + but will fetch the page "http://www.perl.com" or something similar. + If you want to get the file you have to prefix it with "./". Full + URLs are never affected by this. + + o LWP::Simple::get() will bypass LWP for simple HTTP requests. This + should make it somewhat faster. + + o LWP::RobotUA has a new method called $ua->use_sleep() that + controls how niceness towards the servers are enforced. + Previously $ua->use_alarm() used to control this, but this did + not work well on Win32 systems. + + o URI::URL::rel() will handle URLs to a fragment within the same + document better. Initial patch from Nicolai Langfeldt + <janl@math.uio.no>. + + o HTML::Element don't consider </th>, </tr> and </td> optional any + more. I wonder how Netscape managed to not implement this + correctly all this time. + + o Added lots of modern tags to HTML::AsSubs. + + o HTTP::Request::Common will read uploaded files in binmode(). + This should be better for Win32 systems. Contributed by + <Steve_Kilbane@cegelecproj.co.uk>. + + + +Sun Oct 12 1997 Gisle Aas <aas@sn.no> + + o Release 5.14 + + o HTML::Formatter patches from Andreas Gustafsson <gson@araneus.fi>. + The formatters handling of whitespace is much better now. Thanks! + + o HTML::FormatText: can specify margins in the constructor. + + o URI::URL: the base will be absolutized internally now. + + o URI::URL will take advantage of void context provided by perl5.004. + This means that using $u->path and $u->query should be safer now. + + o URI::URL->print_on defaults to STDERR now (used to be STDOUT). + + o URI::URL: removed version 3 compatibility stuff ($COMPAT_VER_3). + + o $ua->mirror should work better on dosish systems (can not + rename when target file exists). + + o Typo in lwp-download prevented it from compiling. + + o Some minor documentations typos corrected. + + + +Sat Sep 20 1997 Gisle Aas <aas@sn.no> + + o Release 5.13 + + o Brand new module called HTTP::Cookies. It stores cookies + (Set-Cookie and Set-Cookie2 headers) from responses and can + create appropriate Cookie headers for requests. It can also + share cookie files with Netscape. + + o LWP::UserAgent now support the cookie_jar() attribute. When + set to an HTTP::Cookies object, it will automatically manage + the cookies sent to the servers. Off by default. + + o New header utility functions in HTTP::Headers::Util. + + o Win32 and OS/2 patches for the lwp-xxx utilities. Turn on + binary mode by default (option to turn it off), avoid modifying $0, + and don't be confused about suffixes in the script names. + Contributed by Ben Coleman <bcoleman@mindspring.com> + + o OpenVMS patch for Font:AFM by Brad Hughes <brad@tmc.naecker.com> + + + +Fri Sep 5 1997 Gisle Aas <aas@sn.no> + + o Release 5.12 + + o decode_entities() would sometimes introduce ";" after + things that looked like they were entities. + + o HTML::LinkExtor knows about <applet code="..."> + + o Patch from Gary Shea <shea@gtsdesign.com> that makes the + tests work even if perl is not called "perl" + + o HTTP::Date handles 12:00PM correctly now. Patch from + William York <william@mathworks.com> + + o HTTP::Request::Common don't quote the boundary string for + multipart/form-data messages any more. + + o Font::AFM works for encodings where .notdef is defined to + have some size. Documentation and efficiency update. + + + +Wed Aug 6 1997 Gisle Aas <aas@sn.no> + + o Release 5.11 + + o Perl version 5.004 is now required for libwww-perl. + + o Win32 patches from Gurusamy Sarathy <gsar@engin.umich.edu>. + Now passes all tests on that platform. + + o HTTPS support contributed by Josh Kronengold <mneme@mcny.com> + + o Support hex entities ÿ HTML::Entities::(en|de)code only + modify their argument in void context. + + o Fixed formatter bug with <font> tags which did not specify size. + + o Better HTML::HeadParser documentation + + o Fixed HTML::LinkExtor documentation typo spotted by Martijn. + + o HTTP::Request::Common now use CRLF for multipart/form-data + + + +Fri Jun 20 1997 Gisle Aas <aas@sn.no> + + o Release 5.10 + + o Make '+' a reserved URL character. Decode unescaped '+' as + space in $url->query_form(). + + o Use $Config{d_alarm} to determine default for $ua->use_alarm() + + + +Tue Jun 10 1997 Gisle Aas <aas@sn.no> + + o Release 5.09 + + o Removed the MIME modules from the distribution. They are distributed + separately now. + + o Added a new module called HTTP::Request::Common + + o Improved HTTP::Status documentation. It is now also possible + to import the is_client_error/is_server_error functions. + + o LWP::MediaTypes::guess_media_type() can now take an optional + HTTP::Header parameter. + + o LWP::Protocol ensures that scheme is legal as module name. + + o LWP::Protocol::http is not as strict when trying to verify the + method name. It now also accepts responses without a message + on the status line. + + o WWW::RobotRules::AnyDBM_File: Some DBMs fail to allow multiple + opens of the same file. Patch from Mark James <jamesm@skate.org> + + o Created Bundle::LWP + + + +Sat Apr 5 1997 Gisle Aas <aas@sn.no> + + o Release 5.08 + + o Made libwww-perl warning compatible with upcoming perl5.004beta2 + (aka 5.003_98) + + o encode_base64() did not work properly if pos() of the string to + encode was different from 0. + + o HTML::Parser was confused about "</" when it did not start an end tag. + + o HTML::FormatPS will provide ISOLatin1Encoding in its output. + + o Calling HTML::LinkExtor->links will clear out old links. + + o url()->rel($base) would ignore the $base argument. + + o Don't croak() when setting url()->path(). + + + +Tue Feb 11 1997 Gisle Aas <aas@sn.no> + + o Release 5.07 + + o Can limit the size of the response content with $ua->max_size() + + o Added time2iso() functions to HTTP::Date. + + o Made LWP::Protocol::http more portable to the MacPerl. /./ match + different things on MacPerl. + + + +Mon Jan 27 1997 Gisle Aas <aas@sn.no> + + o Release 5.06 + + o URI::URL is now compatible with perl5.004 overloading. + + o HTML::HeadParser makes X-Meta-Name headers for <meta> elements + that does not specify an 'http-equiv' attribute. + + o URI::URL::ftp does not die if Net::Domain is not installed and + you ask for an anonymous username or password. + + o WWW::RobotRules: The robots.txt parser did not ignore comment lines + as it should. + + o LWP::Protocol::http is more forgiving towards servers that return + bad responses. + + o Allow "?" before search string in gopher URLs. + + o LWP::Protocol::file did not escape funny filenames when generating + HTML directory listings. + + o LWP::Protocol::ftp now gets the Content-Encoding correct. 'CODE' + content in PUT requests also work now. + + o Relative locations in redirects did not work with URI::URL::strict. + + o OS/2 portability patches from Ilya Zakharevich + + o LWP::Authen::* patch from Doug MacEachern + + o Support experimental data:- URLs + + o Some tests (those using HTTP::Daemon) now die more gracefully if + IO::* modules is not installed. + + + +Wed Dec 4 1996 Gisle Aas <aas@sn.no> + + o Release 5.05 + + o LWP::UserAgent::simple_request: local($SIG{__DIE__}) protects us + against user defined die handlers. + + o Use Net::Domain (instead of Sys::Hostname) to determine FQDN. It + is used by URI::URL when it determines anonymous ftp login address. + + o lwp-download: New program in the bin directory + + o HTML::Parser: Allow '_' in attribute names. This makes it possible + to parse Netscape's bookmarks.html file. + + o HTTP::Daemon: Fixed chunked transfer encoding and multipart content + in get_request(). Support HTTP/0.9 clients. + + o Don't clobber regex variables when HTTP::Message delegates methods + to the header. + + o Base64::decode_base64 now checks that the length input string to + decode is a multiple of 4. + + o t/robot/rules-dbm.t clean up better and will use AnyDBM for dumping + + o File::CounterFile: $/ strikes again by Andreas König + + o File::Listing updates from William York <william@mathworks.com>. We + can now parse the MS-Windows ftp server listings. + + o HTTP::Date now supports the MS-Windows 'dir' date format. Patch by + William York. + + o LWP::MediaTypes::media_suffix will return first type in scalar context. + + + +Tue Oct 22 1996 Gisle Aas <aas@sn.no> + + o Release 5.04 + + o Added HTTP::Daemon. This is a HTTP/1.1 server class. This means + that libwww-perl no longer is a client library only. The HTTP::Daemon + is also used in the new test suite. + + o HTTP::Message support the protocol() method. Used by HTTP::Daemon. + + o HTTP::Response can be constructed with a header and content as + argument. + + o Typo corrections in the documentation. + + o File::Listing::parse_dir accepts "GMT" as timezone now. + + o HTML::Parser will call the start() method with two new parameters; + $attrseq, $origtext. + + o Integrated HTML::FormatPS patches from + Jim Stern <jstern@world.northgrum.com> + + o Class modules don't inherit from AutoLoader any more. They just + import the AUTOLOAD method. + + o LWP::Protocol will untaints scheme before loading protocol module. + + o Digest does not send "opaque" if it was not present in the request. + The "Extension" header is not returned any more. + + o New method: $url->crack that will return a list of the various + elements in a URI::URL. + + o WWW::RobotRules did not use the agent() method when determining + who we are. This affected WWW::RobotRules::AnyDBM_File parsing + for robots.txt. Visit count did not increment for + WWW::RobotRules::InCore. + + + +Tue Oct 1 1996 Gisle Aas <aas@sn.no> + + o Release 5.03 + + o Hakan Ardo's persistent robot rules is now part of the standard + distribution. This is still experimental and might change in the + future. It includes the new WWW::RobotRules::AnyDBM_File class + and updates to LWP::RobotUA. + + o HTML::Parser now supports buggy Netscape comment parsing. Enable + it by calling $p->netscape_buggy_comment(1). The previous version + of the parser could also (under very unlucky and unlikely + circumstances) call the $self->comment() method several times for + the same comment text. + + o HTML::Parser: Use new $p->eof to signal end of document instead of + $p->parse(undef). + + o HTML::Element::starttag() is now smarter about which quotes it + use around attribute values. + + o New HTTP::Response methods: current_age(), freshness_lifetime(), + is_fresh(), fresh_until(). + + o HTTP::Message: New method ($mess->content_ref) which will return + a reference to the current content. + + o URI::URL: New method ($url->rel) which does the opposite of abs(). + Example: url("http://host/a/b/c", "http://host/c/d/e")->rel would + return url("../../a/b/c", "http://host/c/d/e"). This was + suggested by Nicolai Langfeldt <janl@ifi.uio.no> + + o URI::URL: $url->query_form can now take array references as value + specification. For instance: $url->query_form(foo => ['bar', 'baz'] + + o Avoid '"my" variable $var masks earlier declaration in same scope' + warnings in perl5.003_05. + + + +Wed Sep 11 1996 Gisle Aas <aas@sn.no> + + o Release 5.02 + + o lwp-rget: Initialize proxy settings from environment + + o HTML::Entities::encode_entities: Don't encode $ and % + + o HTML::LinkExtor::links: Now works when no links were found. + + o HTTP::Headers::as_string: Allow \n in header value + + + +Tue Aug 1 1996 Gisle Aas <aas@sn.no> + + o Release 5.01. + + o Updated ftp protocol module to be compatible with Net::FTP + version 2.00 (the version found in libnet-1.00) + + o New HTML parser module called HTML::LinkExtor + + o Various documentation typo corrections. Most of these contributed + by Bob Dalgleish. + + o HTML::HeadParser updates 'Content-Base' instead of 'Base'. It also + updates the 'Link' header based on <link ...> + + o HTTP::Headers and HTTP::Status updated according to + draft-ietf-http-v11-spec-06 + + o HTTP::Headers can now use "_" as alternative to "-" in field names. + + o HTTP::Response::base now looks for 'Content-Base', + 'Content-Location' and 'Base' headers. + + o Avoid warning in LWP::MediaTypes if $ENV{HOME} is not defined. + + o The new $ua->parse_head() method can be used to turn off + automatic initialization of response headers from the <HEAD> + section of HTML documents. + + o Added eq() method for URI::URL objects + + o The HTML::Formatter recovers even if a handle method is not defined + for all tags found during traversal + + + +Sun May 26 1996 Gisle Aas <aas@sn.no> + + o Release 5.00. + + o LWP::Simple::head() now return something useful in scalar context. + + o Rewritten the HTML::Parse stuff. Introduced the HTML::Parser class + that will tokenize a HTML document. The rest of the old + HTML::Parse functionality has moved to HTML::TreeBuilder class. + Note, that the HTML stuff is still alpha. + + o Implemented HTML::HeadParser. This is a lightweight parser for + the <HEAD> section of a HTML document. + + o HTML::Element had problem with presenting things like <foo + bar="bar">. + + o HTML::Entities: Included additional ISO-8859/1 entities listed in + RFC1866. + + o HTML::AsSubs exported 'header' instead of 'head' + + o We know about a few more of the HTML 3.2 element. + + o HTTP::Date had problems with years before 1970, because Time::Local + went into an infinite loop. Check for this. + + o Added $header->title method. + + o Made $header->authorization_basic return "uname:passwd" in scalar + context + + o LWP::Protocol::collect() will let the HTML::HeadParser look at the + document content as it arrives. This will initialize headers from + elements like <base href="...">, <title>...</title> and <meta + http-equiv="..." ...>. + + o Simplified $response->base implementation, because we don't have + to look into the content any more. + + o Added -quiet option to lwp-rget + + o Typo fixes and some documentation additions. + + + +Thu May 9 1996 Gisle Aas <aas@sn.no> + + o Release 5b13 + + o Made URI::URL::strict(0) the default. I got tired of all this + eval { } stuff just to recover. The URI::URL::strict'ness also + determine if calling some standard method that happens to be + illegal for some protocol scheme will croak or just be ignored. + + o Ensure correct $INPUT_RECORD_SEPARATOR and $OUTPUT_RECORD_SEPARATOR + at places where we <> or print. + + o Always localize $_ before any 'while(<FILE>) {}'-loops + + o Implemented $protocol->collect_once() and simplified several + of the protocol implementations by using it. + + o The HTML parser used to get it wrong if you were unlucky about the + breakup of the text. An example of broken behaviour was this: + + $html = parse_html "<!-- A comment -"; + $html = parse_html "-> and some text."; + + o The HTML parser does not ignore whitespace-only text any more. + + o HTML::Parse warnings are now optional and turned off by default. + + o New start for $html->as_HTML(). + + o Fixed some typos + + + +Wed Apr 24 1996 Gisle Aas <aas@sn.no> + + o Release 5b12 + + o New utility program called 'lwp-rget'. + + o $response->base was broken for HTML documents + + o New fancy LWP::Debug import() method. Can now turn on debugging with + "use LWP::Debug '+';" + + o Trap exceptions (die) from the response callback routine + + o The RobotUA now requires an e-mail address of the person responsible + for the robot. + + o New $ua->from() method. + + o Support for gopher Index-Search (gopher type '7' requests). + Contributed by Harry Bochner <bochner@das.harvard.edu> + + o Cleaned up white-space usage in the source. + + + +Wed Apr 3 1996 Gisle Aas <aas@sn.no> + + o Release 5b11 + + o Implemented a NNTP protocol module. The library can now fetch and + post news articles. + + o More documentation + + o Don't look at the URI header for redirects + + o New $res->base() method for HTTP::Responses + + o Graham Barr's patch to File::Listing to make it more OO internally + + o content_type() return value is canonicalized + + o $ua->request() does not die on bad URLs any more + + o LWP::MediaTypes merge all media.types files that if finds + + o FTP request with content to file or callback did not work + + o The value of HTTP Host: header is now $url->netloc; + + o The URI::URL constructor now accept URLs wrapped up in "<>" + + o $url->abs() now has a second optional argument that makes it accept + that relative URLs can have scheme, i.e. treat "http:img.gif" as a + relative URL. + + o Added prototypes to the HTTP::Status::is_xxx() functions + + o Added prototypes to the MIME:: encoding/decoding functions + + o Always return scheme for mailto and news URLs (as_string) + + o RobotRules patches from Henry A Rowley. + + o More tests + + o <SMALL> and <BIG> again + + + +Thu Mar 14 1996 Gisle Aas <aas@sn.no> + + o Release 5b10 + + o GET ftp://host/doc was never successful for normal files. + + o LWP::Socket: read_until() did not notice EOF condition. I.e. if + a HTTP server closed the connection before any output was generated, + the we continued to read 0 bytes in a busy loop until the alarm() + killed us. + + o Added support for Digest Access Authentication. Contributed by + Doug MacEachern <dougm@osf.org>. + + o Makefile.PL: check for MD5 library + + o No longer print message content in HTTP::Response::error_as_HTML() + + o Access to "file:/path" gave warning when the environment variable + no_proxy was set. + + o The http-post test sends a Content-Type header. Some servers hang + if this header is missing. + + o HTML::Parse: + - allow <SMALL> and <BIG> tags + - allow empty attribute values + + + +Tue Mar 5 1996 Gisle Aas <aas@sn.no> + + o Release 5b9 + + o Started to write on the libwww-perl cookbook (lwpcook.pod) + + o The URI::URL module now exports the function url(). This is an + alternative (easy to use) constructor function. + + o Expanding relative file URLs starting with "#" did not work. + + o Fixed autoloaded DESTROY problem by adding empty DESTROY routine + to URI::URL. + + o Does not try generate password for ftp-URLs unless the username is + "anonymous" or "ftp" + + o The LWP::Simple user agent proxy settings are initialized from + the proxy environment variables. + + o LWP::Protocol::ftp: Use the Net::FTP library to access ftp servers. + Convert directories to HTML on request (Accept: text/html). + + o New module HTTP::Negotiate + + o New module File::Listing + + o HTTP::Date::str2time can parse a few more formats, like the 'ls -l' + format and ISO 8601. The function now also takes an optional second + parameter which specify a default time zone. + + o Added prototypes to the HTTP::Date functions. + + o The library adds a timestamp to responses ("Client-Date") + + o HTTP::Status: Updated to proposed HTTP/1.1 + + o HTTP::Headers: Updated to proposed HTTP/1.1 + + o LWP::Protocol::http: Updated to HTTP/1.1 methods + + o Took out stringify overloading in HTML::Element. + + + +Mon Feb 26 1996 Gisle Aas <aas@sn.no> + + o Release 5b8 + + o Renamed functions using thisStyleOfNames to this_style_of_names. + Provided a script called 'update_from_5b7' + + o Renamed the 'request' and 'mirror' scripts to 'lwp-request' and + 'lwp-mirror'. The GET, POST, HEAD aliases for 'lwp-request' are + the same. + + o Implemented LWP::RobotUA + + o Class name for RobotRules did not match the file name + + o URI::URL + - perl5.002gamma is required (because use vars). + - The leading slash in now part of the path if it is present. + - More documentation + - Use AutoLoader to speed things up. + - New class URI::URL::_login and made telnet, rlogin, tn3270 + subclasses from this one. + - URI::URL::wais is better supported. + - URI::URL::news is better supported. + - New URI::URL::mailto methods: user/host + + o HTTP::Date::time2str now works correctly with '0' as argument + + o HTTP::Message delegates unknown methods to the headers. + + o HTTP::Request::uri is an alias for HTTP::Request::url. Can set + the URL to undef. + + o Added convenience methods to HTTP::Headers for easy access to + frequently used headers. + + o Simplified LWP::Debug + + o Use standard AutoLoader for LWP::IO functions. + + o Played with the profiler (Devel::DProf) and made some things + go quicker. + + o Included the File::CounterFile module. Excluded Mail::Cap module + as it is also part of the MailTools package. + + + +Mon Feb 5 1996 Gisle Aas <aas@sn.no> + + o Release 5b7 + + o Perl5.002 is required now + + o Rewrite of the URI::URL module (version 4.00) + - escaping/unsafe stuff redone (and removed) + - URI::URL::_generic moved out of URL.pm + - netloc, path, params, query is now stored internally in escaped form + - new methods for URI::URL::_generic are: + epath + eparams + equery + path_components + absolute_path + - new methods for URI::URL::http are: + keywords + query_form + - new methods for URI::URL::file are: + newlocal + local_path + unix_path + dos_path + mac_path + vms_path + + o URI::Escape now semi-compile regular expressions (by evaling an + anonymous sub). Same technique is also used in HTML::Entities. + + o HTTP::Date parser rewritten using regular expressions. + + o HTTP::Headers->as_string() now croaks if any field values + contain newline. + + o HTTP::Status constants use empty prototype. + + o Font metrics moved to a new subdirectory (lib/Font/Metrics) + + o Don't use the VERSION script any more (even if it was very clever) + + o HTML::Entities will now export the names decode_entities() and + encode_entities(). + + o Andreas Koenig's black patch to HTML::Element. + + o The HTML::Formatter now knows about <menu> and <dir> tags + + o The construct "defined ref($arg)" did not work on perl5.002 + because ref now always return something defined. + + o LWP::UserAgent sends by default a 'User-Agent' header. + + o LWP::Simple sends 'User-Agent' header to servers. + + o Updated the LWP::Socket module to use the new Socket.pm interface. + + o LWP::Protocol::http sends the new HTTP/1.1 'Host' header. + + o LWP::Protocol::file use $url->local_path to get a file to open. + It also inserts a <BASE> tag in directories instead of a redirect. + + o MIME::Base64 routines can be called as MIME::Base64::encode() and + MIME::Base64::decode(). Same kind of thing for QuotedPrint. + + + +Mon Nov 6 1995 Gisle Aas <aas@oslonett.no> + + o Release 5b6 + + o Installation should work better for those that still runs + perl4 as 'perl'. The mirror script is not installed by + default. + + o LWP::Socket::_getaddress() Numerical IP addresses did not work. + + o LWP::Socket::pushback() did not work. This also avoids the bad + pp_select() core dump from perl. + + o LWP::IO now also selects on socket exceptions. + + o HTML::Parse: Ignore <!DOCTYPE ...> regardless for case. Some + bad insertElement() calls made infinite loops. + + o The uri.t test works better for places where /tmp is a sym-link. + + + +Sat Sep 16 1995 Gisle Aas <aas@oslonett.no> + + o Release 5b5 + + o newlocal URI::URL does not put "//localhost" into the URLs any + longer. + + o The request program: New -o option to reformat the HTML code + New -C option to provide credentials on the command line. + The -b option never worked. + + o LWP::Protocol::file now returns redirect for access to directories + where the trailing slash is missing. + + + +Thu Sep 14 1995 Gisle Aas <aas@oslonett.no> + + o Speedups and bug fixes in the HTML parser. The parser now + understands some more deprecated tags (like <xmp> and <listing>). + + o HTML::Elements are now stringified using perl overloading. + The interface to the traverse() callback has changed. + + o Implemented HTML formatters for plain text and Postscript. + + o Added lib/Font stuff to support the Postscript formatter. + + o Inspired by Tim Bunce, I implemented the HTML::AsSubs module. + Don't know if it is really useful. + + o The local/get test does not use /etc/passwd any more. + + + +Thu Sep 7 1995 Gisle Aas <aas@oslonett.no> + + o Changed package name to libwww-perl-5xx + + o Made LWP::Protocol::ftp actually transfer data + + o Implemented methods for LWP::Socket to act as a server: + bind(), listen(), accept(), getsockname(), host(), port() + + + +Wed Sep 6 1995 Gisle Aas <aas@oslonett.no> + + o Release 0.04 + + o Implemented modules to parse HTML. + + + +Mon Sep 4 1995 Gisle Aas <aas@oslonett.no> + + o Implemented Mail::Cap which will become part of the MailTools + package. + + o Moved Base64 to MIME::Base64. Reimplemented MIME::Base64 by using + [un]pack("u",...) Implemented LWP::MIME::QuotedPrint for + completeness' sake. Routine names has changed as suggested by Tim + Bunce. + + o LWP::MediaType reads default types from media.types file. + guessMediaType() now also returns encodings. New function mediaSuffix() + + o Pass $url to $ua->getBasicCredentials(). This also fixes security + hole with the old implementation of getBasicCredentials(). + + o LWP::Protocol::file now sets Content-Encoding headers + + o Allow request content to be provided by a callback routine. + + o Fix bug that prevented response callback to work. The first parameter + (data) is no longer a reference, because $_[0] is already a reference. + Don't callback unless successful response. Callbacks during redirects + was confusing. + + o URI::URL. Remove port from netloc if it is the default port. + Don't use anything, just require. + + o HTTP::Message->addContent() does not need a reference parameter. + + o LWP::Socket->open() has been renamed top LWP::Socket->connect() + LWP::Socket->close has gone. Implemented new method LWP::Socket->read() + that returns as fast as it has some data to offer. Implemented + LWP::Socket->pushback(). + + o Does not die in LWP::UserAgent->request() + + o LWP::Socket now use LWP::IO for low level IO + + o Implemented LWP::TkIO as a replacement module for LWP::IO when using Tk. + + + +Thu Aug 17 1995 Gisle Aas <aas@oslonett.no> + + o $ua->redirectOK() for checking redirects + + o reorganized tests in the "t" directory. + + + +Fri Aug 11 1995 Gisle Aas <aas@oslonett.no> + + o Release 0.03 + + o Included RobotRules.pm from Martijn Koster + + + +Thu Aug 10 1995 Gisle Aas <aas@oslonett.no> + + o New module URI::Escape (URI::URL use this module for default + escaping) that provide the uri_escape() and uri_unescape() + functions. + + o Setting $url->scheme now changes the class of the object. + + o Made $httpurl->user() and $httpurl->password() illegal. + Likewise for other URL schemes. + + + + +Wed Aug 9 1995 Gisle Aas <aas@oslonett.no> + + o Reorganisation as discussed on <libwww-perl@ics.uci.edu> + LWP::Date --> HTTP::Date + LWP::MIMEheader --> HTTP::Headers + LWP::StatusCode --> HTTP::Status + LWP::Message --> HTTP::Message + LWP::Request --> HTTP::Request + LWP::Response --> HTTP::Response + LWP::MIMEtypes --> LWP::MediaTypes + + o HTTP::Date parses ctime format with missing timezone as suggested + by Roy Fielding <fielding@beach.w3.org> + + o HTTP::Status and LWP::MediaTypes exports their functions by default. + + o Splitted up the URI::URL module. Schemes are implemented by separate + files that are autoloaded when used. Self test moved to "t/uri.t". + + + +Mon Aug 7 1995 Gisle Aas <aas@oslonett.no> + + o Applied patch from Marc Hedlund <hedlund@best.com> + - Update the @header_order according to the August 3rd draft. + - Added Response Header fields: 'Location', 'Public', 'Retry-After', + 'Server', and 'WWW-Authenticate'. + - Moved 'unknown header' handling from &scan to &header. The old + implementation was forcing all unknown header-words to begin with + an uppercase (as it should be), but disallowed other uppercase + letters. + - updates the status code messages under the August + 3rd HTTP/1.0 draft. '203' became 'Non-Authoritative Information', + '303' became 'See Other', and a new code, + '411 Authorization Refused', was added. + + o Can remove multiple headers with single removeHeader() call in MIMEheader. + + o Can assign multiple field/value pairs in header() method of MIMEheader. + + o A field with multiple values is printed as separate values in + MIMEheader::as_string(). + + o LWP::Response contain new attributes: previous() and request(). These + attributes are updated by the UserAgent. + + o Appended \n to some die statements in Socket so that line numbers are + suppressed in error messages. + + o Made UserAgent::clone work for reference members + + o Check for redirect loops and multiple authorization failures by + examination of the response chain. + + o Use "\015\012" instead of "\r\n" in protocol modules. Some systems + define \r and \n differently. + + o request program can now handle documents that needs authorization by + prompting the user for username/password. Added new -S option to print + request/response chain. + + + +Tue Jul 25 1995 Gisle Aas <aas@oslonett.no> + + o Release 0.02 + + o Included URI::URL in the release + + + +Mon Jul 24 1995 Gisle Aas <aas@oslonett.no> + + o Incorporated Makemake.PL and VERSION from Andreas Koenig <koenig@mind.de> + As a result of this the following things have changed: + - programs in "bin" are extracted from .PL-files + - reintroduced "lib" + - "test" has been renamed as "t" + - test programs in "t" has been made Test::Harness compatible + - we now have a MANIFEST file + - no more need fro update_version, make-dist, lwp-install + + o Renamed bin/get to bin/request. Links to it are now all upper case. + + o Proxy support in bin/request (provided by Martijn Koster) + + o UserAgent can now load proxy settings from environment + + o LWP::Protocol::ftp is under way but not really finished + + + +Tue Jul 18 1995 Gisle Aas <aas@oslonett.no> + + o Implemented LWP::Protocol::gopher + + o Implemented LWP::Protocol::mailto + + o Fixed proxy typo + + + +Mon Jul 17 1995 Gisle Aas <aas@oslonett.no> + + o Made release 0.01 + + + +Mon Jul 17 1995 Gisle Aas <aas@oslonett.no> + + o Don't loose first line of HTTP/0.9 requests + + o LWP::Socket use syswrite() for writing + + o Added RC_* documentation to LWP::StatusCode + + o LWP::Date now use hash to look up month numbers + + o Added -f option to "get" + + o Untabify + + o Added a "TODO" list + + o Fixed various typos + + + +Fri Jul 14 1995 Gisle Aas <aas@oslonett.no> + + o Reorganized directories. Moved LWP.pm up. Moved file.pm and http.pm + into the LWP::Protocol directory. Moved LWP out of the lib directory + and removed lib. + + o Implemented the "get" and "mirror" scripts in the "bin" directory. + + o Implemented "install-lwp", "update_version" and "make-dist". The library + version number is found in the VERSION file. + + o Always adds 1900 to the year in LWP::Date + + o In LWP::MIMEheader: Implemented clone(), removeHeader() and scan() + methods. Reimplemented asString. Removed asMIME(). Moved "Good + Practice" into this file, and reimplemented it. + + o Moved "header" and "content" into LWP::Message class. This change made + LWP::Request and LWP::Response much simpler. Made clone() method + actually work. + + o LWP::Protocol::implementor does not die if it cannot load package. + + o Moved UserAgent convenience methods into LWP::Simple. Made LWP::Simple + export LWP::StatusCode symbols and functions. + + o Implemented $ua->isProtocolSupported($scheme) method. + + o Nicer directory listing in LWP::Protocol::file.pm + + o Rely on URI::URL 3.00 behaviour for $url->full_path + + o Library version number now in LWP.pm. You should be able to say + "use LWP 1.0;" if you need at least this version. + + o Various cleanups and arranging layout as I like it. Use fooBar-style + (as opposed to foo_bar style) everywhere. This means that as_string() + now is called asString(). + + o Added more documentation. + + + +Wed Jun 14 1995 Gisle Aas <aas@oslonett.no> + + o Removed lot of redundant & before function calls. + + o $this --> $self + + o &collector passes content as a reference, don't want to copy so much + + o parameterlist to collect callback has been rearranged + + o Response::addContent gets a reference to the data + + o Added some callback documentation to UserAgent.pm + + o Protocol::file now uses the collector + + o Introduced LWP::Simple + + + +Sun Jun 11 1995 Martijn Koster <m.koster@nexor.co.uk> + + o Added basic authentication support + + o Added mirroring of single documents + + o Change Protocol construction from whacky URL.pm (constructor returns + subclass) to more normal C++'ish way. + + + +Wed June 7 1995 Martijn Koster <m.koster@nexor.co.uk> + + o Minor cleanups from printed code inspection + + + +Wed May 24 1995 Martijn Koster <m.koster@nexor.co.uk> + + o Added redirection resolution + + o Added optional autoloading of protocols + + + +Tue May 23 1995 Martijn Koster <m.koster@nexor.co.uk> + + o Separated socket stuff into separate module + + o Added HTTP proxy support + + o Made alarm handling optional + + o Added a LWP::Message for completeness sake + + o Added LWP::MemberMixin to reduce code duplication + + o Cosmetic changes to LWP::Date + + o Renamed LWP::Error to LWP::StatusCode + + o Renamed LWP::MIME to LWP::MIMEtype + + o Changed the tests to cope with all this + +It's getting there... + + + +Mon May 22 1995 Martijn Koster <m.koster@nexor.co.uk> + + o Changed the socket reading to use sysread. This will have to go + into a module of its own. + + + +Thu 18 May 1995 Martijn Koster <m.koster@nexor.co.uk> + + o Mentioned on libwww-perl that I had changed the classes around lots. diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..d7886e6 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,70 @@ +AUTHORS Who made this +bin/lwp-download Writes bin/lwp-download script +bin/lwp-dump Writes bin/lwp-dump script +bin/lwp-mirror Writes bin/lwp-mirror script +bin/lwp-request Writes bin/lwp-request script +Changes History of this package +lib/LWP.pm Includes what you need +lib/LWP/Authen/Basic.pm Basic authentication scheme +lib/LWP/Authen/Digest.pm Digest authentication scheme +lib/LWP/Authen/Ntlm.pm NTLM authentication (Microsoft) +lib/LWP/ConnCache.pm Connection cache +lib/LWP/Debug.pm Debugging support +lib/LWP/DebugFile.pm Send debug output to a file +lib/LWP/MemberMixin.pm Helps you access %$self +lib/LWP/Protocol.pm Virtual base class for LWP protocols +lib/LWP/Protocol/cpan.pm Access to cpan URLs +lib/LWP/Protocol/data.pm Access to data URLs +lib/LWP/Protocol/file.pm Access local files +lib/LWP/Protocol/ftp.pm Access with the FTP protocol +lib/LWP/Protocol/GHTTP.pm Alternative HTTP protocol handler +lib/LWP/Protocol/gopher.pm Access with the Gopher protocol +lib/LWP/Protocol/http.pm Access with HTTP/1.1 protocol +lib/LWP/Protocol/loopback.pm Returns request (like HTTP TRACE) +lib/LWP/Protocol/mailto.pm Allows you to POST mail using sendmail +lib/LWP/Protocol/nntp.pm Handles access to news: and nntp: URLs +lib/LWP/Protocol/nogo.pm Denies all requests. +lib/LWP/RobotUA.pm Easy creation of conforming robots +lib/LWP/Simple.pm Procedural LWP interface +lib/LWP/UserAgent.pm A WWW UserAgent class +lwpcook.pod Libwww-perl examples +lwptut.pod Libwww-perl tutorial +Makefile.PL Makefile generator +MANIFEST This file +README Get you started with this package +README.SSL When you need SSL support +t/base/protocols.t Test protocol methods of LWP::UserAgent +t/base/proxy.t +t/base/ua.t Basic LWP::UserAgent tests +t/live/apache-http10.t +t/live/jigsaw/auth-b.t +t/live/jigsaw/auth-d.t +t/live/jigsaw/chunk.t +t/live/jigsaw/md5-get.t +t/live/jigsaw/md5.t +t/live/jigsaw/neg-get.t +t/live/jigsaw/neg.t +t/live/jigsaw/te.t +t/live/online.t +t/local/autoload-get.t +t/local/autoload.t Test autoloading of LWP::Protocol modules +t/local/get.t Try to get a local file +t/local/http.t Test http to local server +t/local/protosub.t Test with other protocol module +t/net/cgi-bin/moved +t/net/cgi-bin/nph-slowdata +t/net/cgi-bin/slowread +t/net/cgi-bin/test +t/net/cgi-bin/timeout +t/net/config.pl.dist Suggested configuration for net tests +t/net/http-get.t +t/net/http-post.t +t/net/http-timeout.t +t/net/mirror.t +t/net/moved.t +t/net/proxy.t +t/robot/ua-get.t +t/robot/ua.t Test LWP::RobotUA +talk-to-ourself Are we able to run tests talk HTTP to local server? +META.yml Module YAML meta-data (added by MakeMaker) +META.json Module JSON meta-data (added by MakeMaker) diff --git a/META.json b/META.json new file mode 100644 index 0000000..dac798b --- /dev/null +++ b/META.json @@ -0,0 +1,90 @@ +{ + "abstract" : "The World-Wide Web library for Perl", + "author" : [ + "Gisle Aas <gisle@activestate.com>" + ], + "dynamic_config" : 0, + "generated_by" : "ExtUtils::MakeMaker version 7.0512, CPAN::Meta::Converter version 2.143240", + "license" : [ + "perl_5" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "name" : "libwww-perl", + "no_index" : { + "directory" : [ + "t", + "inc" + ] + }, + "prereqs" : { + "build" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "runtime" : { + "requires" : { + "Digest::MD5" : "0", + "Encode" : "2.12", + "Encode::Locale" : "0", + "File::Listing" : "6", + "HTML::Entities" : "0", + "HTML::HeadParser" : "0", + "HTTP::Cookies" : "6", + "HTTP::Daemon" : "6", + "HTTP::Date" : "6", + "HTTP::Negotiate" : "6", + "HTTP::Request" : "6", + "HTTP::Request::Common" : "6", + "HTTP::Response" : "6", + "HTTP::Status" : "6", + "IO::Select" : "0", + "IO::Socket" : "0", + "LWP::MediaTypes" : "6", + "MIME::Base64" : "2.1", + "Net::FTP" : "2.58", + "Net::HTTP" : "6.07", + "URI" : "1.10", + "URI::Escape" : "0", + "WWW::RobotRules" : "6", + "perl" : "5.008001" + }, + "suggests" : { + "Authen::NTLM" : "1.02", + "HTTP::GHTTP" : "0", + "LWP::Protocol::https" : "6.02" + } + }, + "test" : { + "requires" : { + "FindBin" : "0", + "Test" : "0", + "Test::More" : "0" + } + } + }, + "release_status" : "stable", + "resources" : { + "bugtracker" : { + "mailto" : "bug-libwww-perl@rt.cpan.org", + "web" : "https://rt.cpan.org/Public/Dist/Display.html?Name=libwww-perl" + }, + "repository" : { + "type" : "git", + "url" : "https://github.com/libwww-perl/libwww-perl.git", + "web" : "https://github.com/libwww-perl/libwww-perl" + }, + "x_IRC" : "irc://irc.perl.org/#lwp", + "x_MailingList" : "mailto:libwww@perl.org" + }, + "version" : "6.13" +} + diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..9a65c0a --- /dev/null +++ b/META.yml @@ -0,0 +1,54 @@ +--- +abstract: 'The World-Wide Web library for Perl' +author: + - 'Gisle Aas <gisle@activestate.com>' +build_requires: + ExtUtils::MakeMaker: '0' + FindBin: '0' + Test: '0' + Test::More: '0' +configure_requires: + ExtUtils::MakeMaker: '0' +dynamic_config: 0 +generated_by: 'ExtUtils::MakeMaker version 7.0512, CPAN::Meta::Converter version 2.143240' +license: perl +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: '1.4' +name: libwww-perl +no_index: + directory: + - t + - inc +requires: + Digest::MD5: '0' + Encode: '2.12' + Encode::Locale: '0' + File::Listing: '6' + HTML::Entities: '0' + HTML::HeadParser: '0' + HTTP::Cookies: '6' + HTTP::Daemon: '6' + HTTP::Date: '6' + HTTP::Negotiate: '6' + HTTP::Request: '6' + HTTP::Request::Common: '6' + HTTP::Response: '6' + HTTP::Status: '6' + IO::Select: '0' + IO::Socket: '0' + LWP::MediaTypes: '6' + MIME::Base64: '2.1' + Net::FTP: '2.58' + Net::HTTP: '6.07' + URI: '1.10' + URI::Escape: '0' + WWW::RobotRules: '6' + perl: '5.008001' +resources: + IRC: irc://irc.perl.org/#lwp + MailingList: mailto:libwww@perl.org + bugtracker: https://rt.cpan.org/Public/Dist/Display.html?Name=libwww-perl + repository: https://github.com/libwww-perl/libwww-perl.git +version: '6.13' + diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..17158b7 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,138 @@ +#!perl -w + +require 5.008001; +use strict; +use ExtUtils::MakeMaker; +use Getopt::Long qw(GetOptions); + +GetOptions(\my %opt, + 'aliases', + 'no-programs|n', + 'live-tests', + 'jigsaw-tests', +) or do { + die "Usage: $0 [--aliases] [--no-programs] [--live-tests] [--jigsaw-tests]\n"; +}; + +my @prog; +push(@prog, qw(lwp-request lwp-mirror lwp-download lwp-dump)) + unless $opt{'no-programs'} || grep /^LIB=/, @ARGV; + +if ($opt{'aliases'} && grep(/lwp-request/, @prog)) { + require File::Copy; + for (qw(GET HEAD POST)) { + File::Copy::copy("bin/lwp-request", "bin/$_") || die "Can't copy bin/$_"; + chmod(0755, "bin/$_"); + push(@prog, $_); + } +} + +my $tests = 't/base/*.t t/html/*.t t/robot/*.t t/local/*.t t/net/*.t'; +$tests .= ' t/live/*.t' + if $opt{'live-tests'}; +$tests .= ' t/live/jigsaw/*.t' + if $opt{'jigsaw-tests'}; + +my %WriteMakefileArgs = ( + NAME => 'LWP', + DISTNAME => 'libwww-perl', + VERSION_FROM => 'lib/LWP.pm', + ABSTRACT => 'The World-Wide Web library for Perl', + AUTHOR => 'Gisle Aas <gisle@activestate.com>', + EXE_FILES => [ map "bin/$_", @prog ], + LICENSE => 'perl_5', + MIN_PERL_VERSION => 5.008001, + PREREQ_PM => { + 'Digest::MD5' => 0, + 'Encode' => "2.12", + 'Encode::Locale' => 0, + 'File::Listing' => 6, + 'HTML::Entities' => 0, + 'HTML::HeadParser' => 0, + 'HTTP::Cookies' => 6, + 'HTTP::Daemon' => 6, + 'HTTP::Date' => 6, + 'HTTP::Negotiate' => 6, + 'HTTP::Request' => 6, + 'HTTP::Request::Common' => 6, + 'HTTP::Response' => 6, + 'HTTP::Status' => 6, + 'IO::Select' => 0, + 'IO::Socket' => 0, + 'LWP::MediaTypes' => 6, + 'MIME::Base64' => "2.1", + 'Net::FTP' => "2.58", + 'Net::HTTP' => "6.07", + 'URI' => "1.10", + 'URI::Escape' => 0, + 'WWW::RobotRules' => 6, + }, + TEST_REQUIRES => { + 'Test' => '0', + 'Test::More' => '0', + 'FindBin' => '0', + }, + META_MERGE => { + 'meta-spec' => { version => 2 }, + dynamic_config => 0, + prereqs => { + runtime => { + suggests => { + 'LWP::Protocol::https' => '6.02', + 'Authen::NTLM' => "1.02", + 'HTTP::GHTTP' => '0', + }, + }, + }, + resources => { + repository => { + url => 'https://github.com/libwww-perl/libwww-perl.git', + web => 'https://github.com/libwww-perl/libwww-perl', + type => 'git', + }, + bugtracker => { + mailto => 'bug-libwww-perl@rt.cpan.org', + web => 'https://rt.cpan.org/Public/Dist/Display.html?Name=libwww-perl', + }, + x_MailingList => 'mailto:libwww@perl.org', + x_IRC => 'irc://irc.perl.org/#lwp', + } + }, + clean => { FILES => join(" ", map "bin/$_", grep /^[A-Z]+$/, @prog) }, + test => { TESTS => $tests }, +); + +{ + # compatibility with older versions of MakeMaker + my $developer = -f ".gitignore"; + + die 'need to do a merge with CPAN::Meta::Requirements!!' + if $developer && exists $WriteMakefileArgs{BUILD_REQUIRES}; + + if (!eval { ExtUtils::MakeMaker->VERSION('6.6303') }) { + $WriteMakefileArgs{BUILD_REQUIRES} = $WriteMakefileArgs{TEST_REQUIRES}; + delete $WriteMakefileArgs{TEST_REQUIRES}; + } + + if (!eval { ExtUtils::MakeMaker->VERSION('6.5501') }) { + @{$WriteMakefileArgs{PREREQ_PM}}{ keys %{$WriteMakefileArgs{BUILD_REQUIRES}} } = + @{$WriteMakefileArgs{BUILD_REQUIRES}}{ keys %{$WriteMakefileArgs{BUILD_REQUIRES}} }; + + delete $WriteMakefileArgs{BUILD_REQUIRES}; + } + + my %mm_req = ( + LICENCE => 6.31, + META_MERGE => 6.45, + META_ADD => 6.45, + MIN_PERL_VERSION => 6.48, + ); + for (keys %mm_req) { + unless (eval { ExtUtils::MakeMaker->VERSION($mm_req{$_}) }) { + warn "$_ $@" if $developer; + delete $WriteMakefileArgs{$_}; + } + } +} + +WriteMakefile(%WriteMakefileArgs); @@ -0,0 +1,100 @@ + + L I B W W W - P E R L - 6 + ----------------------------- + + +The libwww-perl collection is a set of Perl modules which provides a +simple and consistent application programming interface to the +World-Wide Web. The main focus of the library is to provide classes +and functions that allow you to write WWW clients. The library also +contain modules that are of more general use and even classes that +help you implement simple HTTP servers. + + +PREREQUISITES + +In order to install and use this package you will need Perl version +5.8.1 or better. Some modules within this package depend on other +packages that are distributed separately from Perl. We recommend that +you have the following packages installed before you install +libwww-perl: + + Digest-MD5 + Encode-Locale + HTML-Form + HTML-Parser + HTML-Tagset + HTTP-Cookies + HTTP-Date + HTTP-Message + HTTP-Negotiate + libnet + LWP-MediaTypes + MIME-Base64 + Net-HTTP + URI + WWW-RobotRules + +If you want to access sites using the https protocol, then you need to +install the LWP::Protocol::https module from CPAN. + + +INSTALLATION + +You install libwww-perl using the normal perl module distribution drill: + + perl Makefile.PL + make + make test + make install + +If you don't want to install any programs (only the library files) then +pass the '--no-programs' option to Makefile.PL: + + perl Makefile.PL --no-programs + + +DOCUMENTATION + +See the lib/LWP.pm file for an overview of the library. See the +Changes file for recent changes. + +POD style documentation is included in all modules and scripts. These +are normally converted to manual pages and installed as part of the +"make install" process. You should also be able to use the 'perldoc' +utility to extract and read documentation from the module files +directly. + + +SUPPORT + +Bug reports and suggestions for improvements can be sent to the +<libwww@perl.org> mailing list. This mailing list is also the place +for general discussions and development of the libwww-perl package. + + +AVAILABILITY + +The latest version of libwww-perl is available from CPAN: + + http://search.cpan.org/dist/libwww-perl/ + +If you want to hack on the source it might be a good idea to grab the +latest version with git using the command: + + git clone git://github.com/libwww-perl/libwww-perl.git lwp + +You can also browse the git repository at: + + https://github.com/libwww-perl/libwww-perl + + +COPYRIGHT + + © 1995-2010 Gisle Aas. All rights reserved. + © 1995 Martijn Koster. All rights reserved. + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +Enjoy! diff --git a/README.SSL b/README.SSL new file mode 100644 index 0000000..3c2202c --- /dev/null +++ b/README.SSL @@ -0,0 +1,7 @@ +As of libwww-perl v6.02 you need to install the LWP::Protocol::https module +from its own separate distribution to enable support for https://... URLs for +LWP::UserAgent. + +This makes it possible for that distribution to state the required dependencies +as non-optional. See <https://rt.cpan.org/Ticket/Display.html?id=66838> for +further discussion why we ended up with this solution. diff --git a/bin/lwp-download b/bin/lwp-download new file mode 100755 index 0000000..bf32a85 --- /dev/null +++ b/bin/lwp-download @@ -0,0 +1,330 @@ +#!/usr/bin/perl -w + +=head1 NAME + +lwp-download - Fetch large files from the web + +=head1 SYNOPSIS + +B<lwp-download> [B<-a>] [B<-s>] <I<url>> [<I<local path>>] + +=head1 DESCRIPTION + +The B<lwp-download> program will save the file at I<url> to a local +file. + +If I<local path> is not specified, then the current directory is +assumed. + +If I<local path> is a directory, then the last segment of the path of the +I<url> is appended to form a local filename. If the I<url> path ends with +slash the name "index" is used. With the B<-s> option pick up the last segment +of the filename from server provided sources like the Content-Disposition +header or any redirect URLs. A file extension to match the server reported +Content-Type might also be appended. If a file with the produced filename +already exists, then B<lwp-download> will prompt before it overwrites and will +fail if its standard input is not a terminal. This form of invocation will +also fail is no acceptable filename can be derived from the sources mentioned +above. + +If I<local path> is not a directory, then it is simply used as the +path to save into. If the file already exists it's overwritten. + +The I<lwp-download> program is implemented using the I<libwww-perl> +library. It is better suited to down load big files than the +I<lwp-request> program because it does not store the file in memory. +Another benefit is that it will keep you updated about its progress +and that you don't have much options to worry about. + +Use the C<-a> option to save the file in text (ascii) mode. Might +make a difference on DOSish systems. + +=head1 EXAMPLE + +Fetch the newest and greatest perl version: + + $ lwp-download http://www.perl.com/CPAN/src/latest.tar.gz + Saving to 'latest.tar.gz'... + 11.4 MB received in 8 seconds (1.43 MB/sec) + +=head1 AUTHOR + +Gisle Aas <gisle@aas.no> + +=cut + +#' get emacs out of quote mode + +use strict; + +use LWP::UserAgent (); +use LWP::MediaTypes qw(guess_media_type media_suffix); +use URI (); +use HTTP::Date (); +use Encode; +use Encode::Locale; + +my $progname = $0; +$progname =~ s,.*/,,; # only basename left in progname +$progname =~ s,.*\\,, if $^O eq "MSWin32"; +$progname =~ s/\.\w*$//; # strip extension if any + +#parse option +use Getopt::Std; +my %opt; +unless (getopts('as', \%opt)) { + usage(); +} + +my $url = URI->new(decode(locale => shift) || usage()); +my $argfile = encode(locale_fs => decode(locale => shift)); +usage() if defined($argfile) && !length($argfile); +my $VERSION = "6.09"; + +my $ua = LWP::UserAgent->new( + agent => "lwp-download/$VERSION ", + keep_alive => 1, + env_proxy => 1, +); + +my $file; # name of file we download into +my $length; # total number of bytes to download +my $flength; # formatted length +my $size = 0; # number of bytes received +my $start_t; # start time of download +my $last_dur; # time of last callback + +my $shown = 0; # have we called the show() function yet + +$SIG{INT} = sub { die "Interrupted\n"; }; + +$| = 1; # autoflush + +my $res = $ua->request(HTTP::Request->new(GET => $url), + sub { + unless(defined $file) { + my $res = $_[1]; + + my $directory; + if (defined $argfile && -d $argfile) { + ($directory, $argfile) = ($argfile, undef); + } + + unless (defined $argfile) { + # find a suitable name to use + $file = $opt{s} && $res->filename; + + # if this fails we try to make something from the URL + unless ($file) { + $file = ($url->path_segments)[-1]; + if (!defined($file) || !length($file)) { + $file = "index"; + my $suffix = media_suffix($res->content_type); + $file .= ".$suffix" if $suffix; + } + elsif ($url->scheme eq 'ftp' || + $file =~ /\.t[bg]z$/ || + $file =~ /\.tar(\.(Z|gz|bz2?))?$/ + ) { + # leave the filename as it was + } + else { + my $ct = guess_media_type($file); + unless ($ct eq $res->content_type) { + # need a better suffix for this type + my $suffix = media_suffix($res->content_type); + $file .= ".$suffix" if $suffix; + } + } + } + + # validate that we don't have a harmful filename now. The server + # might try to trick us into doing something bad. + if (!length($file) || + $file =~ s/([^a-zA-Z0-9_\.\-\+\~])/sprintf "\\x%02x", ord($1)/ge || + $file =~ /^\./ + ) + { + die "Will not save <$url> as \"$file\".\nPlease override file name on the command line.\n"; + } + + if (defined $directory) { + require File::Spec; + $file = File::Spec->catfile($directory, $file); + } + + # Check if the file is already present + if (-l $file) { + die "Will not save <$url> to link \"$file\".\nPlease override file name on the command line.\n"; + } + elsif (-f _) { + die "Will not save <$url> as \"$file\" without verification.\nEither run from terminal or override file name on the command line.\n" + unless -t; + $shown = 1; + print "Overwrite $file? [y] "; + my $ans = <STDIN>; + unless (defined($ans) && $ans =~ /^y?\n/) { + if (defined $ans) { + print "Ok, aborting.\n"; + } + else { + print "\nAborting.\n"; + } + exit 1; + } + $shown = 0; + } + elsif (-e _) { + die "Will not save <$url> as \"$file\". Path exists.\n"; + } + else { + print "Saving to '$file'...\n"; + use Fcntl qw(O_WRONLY O_EXCL O_CREAT); + sysopen(FILE, $file, O_WRONLY|O_EXCL|O_CREAT) || + die "Can't open $file: $!"; + } + } + else { + $file = $argfile; + } + unless (fileno(FILE)) { + open(FILE, ">", $file) || die "Can't open $file: $!\n"; + } + binmode FILE unless $opt{a}; + $length = $res->content_length; + $flength = fbytes($length) if defined $length; + $start_t = time; + $last_dur = 0; + } + + print FILE $_[0] or die "Can't write to $file: $!\n"; + $size += length($_[0]); + + if (defined $length) { + my $dur = time - $start_t; + if ($dur != $last_dur) { # don't update too often + $last_dur = $dur; + my $perc = $size / $length; + my $speed; + $speed = fbytes($size/$dur) . "/sec" if $dur > 3; + my $secs_left = fduration($dur/$perc - $dur); + $perc = int($perc*100); + my $show = "$perc% of $flength"; + $show .= " (at $speed, $secs_left remaining)" if $speed; + show($show, 1); + } + } + else { + show( fbytes($size) . " received"); + } + } +); + +if (fileno(FILE)) { + close(FILE) || die "Can't write to $file: $!\n"; + + show(""); # clear text + print "\r"; + print fbytes($size); + print " of ", fbytes($length) if defined($length) && $length != $size; + print " received"; + my $dur = time - $start_t; + if ($dur) { + my $speed = fbytes($size/$dur) . "/sec"; + print " in ", fduration($dur), " ($speed)"; + } + print "\n"; + + if (my $mtime = $res->last_modified) { + utime time, $mtime, $file; + } + + if ($res->header("X-Died") || !$res->is_success) { + if (my $died = $res->header("X-Died")) { + print "$died\n"; + } + if (-t) { + print "Transfer aborted. Delete $file? [n] "; + my $ans = <STDIN>; + if (defined($ans) && $ans =~ /^y\n/) { + unlink($file) && print "Deleted.\n"; + } + elsif ($length > $size) { + print "Truncated file kept: ", fbytes($length - $size), " missing\n"; + } + else { + print "File kept.\n"; + } + exit 1; + } + else { + print "Transfer aborted, $file kept\n"; + } + } + exit 0; +} + +# Did not manage to create any file +print "\n" if $shown; +if (my $xdied = $res->header("X-Died")) { + print "$progname: Aborted\n$xdied\n"; +} +else { + print "$progname: ", $res->status_line, "\n"; +} +exit 1; + + +sub fbytes +{ + my $n = int(shift); + if ($n >= 1024 * 1024) { + return sprintf "%.3g MB", $n / (1024.0 * 1024); + } + elsif ($n >= 1024) { + return sprintf "%.3g KB", $n / 1024.0; + } + else { + return "$n bytes"; + } +} + +sub fduration +{ + use integer; + my $secs = int(shift); + my $hours = $secs / (60*60); + $secs -= $hours * 60*60; + my $mins = $secs / 60; + $secs %= 60; + if ($hours) { + return "$hours hours $mins minutes"; + } + elsif ($mins >= 2) { + return "$mins minutes"; + } + else { + $secs += $mins * 60; + return "$secs seconds"; + } +} + + +BEGIN { + my @ani = qw(- \ | /); + my $ani = 0; + + sub show + { + my($mess, $show_ani) = @_; + print "\r$mess" . (" " x (75 - length $mess)); + print $show_ani ? "$ani[$ani++]\b" : " "; + $ani %= @ani; + $shown++; + } +} + +sub usage +{ + die "Usage: $progname [-a] <url> [<lpath>]\n"; +} diff --git a/bin/lwp-dump b/bin/lwp-dump new file mode 100755 index 0000000..4faa414 --- /dev/null +++ b/bin/lwp-dump @@ -0,0 +1,120 @@ +#!/usr/bin/perl -w + +use strict; +use LWP::UserAgent (); +use Getopt::Long qw(GetOptions); +use Encode; +use Encode::Locale; + +my $VERSION = "6.09"; + +GetOptions(\my %opt, + 'parse-head', + 'max-length=n', + 'keep-client-headers', + 'method=s', + 'agent=s', + 'request', +) || usage(); + +my $url = shift || usage(); +@ARGV && usage(); + +sub usage { + (my $progname = $0) =~ s,.*/,,; + die <<"EOT"; +Usage: $progname [options] <url> + +Recognized options are: + --agent <str> + --keep-client-headers + --max-length <n> + --method <str> + --parse-head + --request + +EOT +} + +my $ua = LWP::UserAgent->new( + parse_head => $opt{'parse-head'} || 0, + keep_alive => 1, + env_proxy => 1, + agent => $opt{agent} || "lwp-dump/$VERSION ", +); + +my $req = HTTP::Request->new($opt{method} || 'GET' => decode(locale => $url)); +my $res = $ua->simple_request($req); +$res->remove_header(grep /^Client-/, $res->header_field_names) + unless $opt{'keep-client-headers'} or + ($res->header("Client-Warning") || "") eq "Internal response"; + +if ($opt{request}) { + $res->request->dump; + print "\n"; +} + +$res->dump(maxlength => $opt{'max-length'}); + +__END__ + +=head1 NAME + +lwp-dump - See what headers and content is returned for a URL + +=head1 SYNOPSIS + +B<lwp-dump> [ I<options> ] I<URL> + +=head1 DESCRIPTION + +The B<lwp-dump> program will get the resource identified by the URL and then +dump the response object to STDOUT. This will display the headers returned and +the initial part of the content, escaped so that it's safe to display even +binary content. The escapes syntax used is the same as for Perl's double +quoted strings. If there is no content the string "(no content)" is shown in +its place. + +The following options are recognized: + +=over + +=item B<--agent> I<str> + +Override the user agent string passed to the server. + +=item B<--keep-client-headers> + +LWP internally generate various C<Client-*> headers that are stripped by +B<lwp-dump> in order to show the headers exactly as the server provided them. +This option will suppress this. + +=item B<--max-length> I<n> + +How much of the content to show. The default is 512. Set this +to 0 for unlimited. + +If the content is longer then the string is chopped at the +limit and the string "...\n(### more bytes not shown)" +appended. + +=item B<--method> I<str> + +Use the given method for the request instead of the default "GET". + +=item B<--parse-head> + +By default B<lwp-dump> will not try to initialize headers by looking at the +head section of HTML documents. This option enables this. This corresponds to +L<LWP::UserAgent/"parse_head">. + +=item B<--request> + +Also dump the request sent. + +=back + +=head1 SEE ALSO + +L<lwp-request>, L<LWP>, L<HTTP::Message/"dump"> + diff --git a/bin/lwp-mirror b/bin/lwp-mirror new file mode 100755 index 0000000..5d8c401 --- /dev/null +++ b/bin/lwp-mirror @@ -0,0 +1,105 @@ +#!/usr/bin/perl -w + +# Simple mirror utility using LWP + +=head1 NAME + +lwp-mirror - Simple mirror utility + +=head1 SYNOPSIS + + lwp-mirror [-v] [-t timeout] <url> <local file> + +=head1 DESCRIPTION + +This program can be used to mirror a document from a WWW server. The +document is only transferred if the remote copy is newer than the local +copy. If the local copy is newer nothing happens. + +Use the C<-v> option to print the version number of this program. + +The timeout value specified with the C<-t> option. The timeout value +is the time that the program will wait for response from the remote +server before it fails. The default unit for the timeout value is +seconds. You might append "m" or "h" to the timeout value to make it +minutes or hours, respectively. + +Because this program is implemented using the LWP library, it only +supports the protocols that LWP supports. + +=head1 SEE ALSO + +L<lwp-request>, L<LWP> + +=head1 AUTHOR + +Gisle Aas <gisle@aas.no> + +=cut + + +use LWP::Simple qw(mirror is_success status_message $ua); +use Getopt::Std; +use Encode; +use Encode::Locale; + +$progname = $0; +$progname =~ s,.*/,,; # use basename only +$progname =~ s/\.\w*$//; #strip extension if any + +$VERSION = "6.09"; + +$opt_h = undef; # print usage +$opt_v = undef; # print version +$opt_t = undef; # timeout + +unless (getopts("hvt:")) { + usage(); +} + +if ($opt_v) { + require LWP; + my $DISTNAME = 'libwww-perl-' . LWP::Version(); + die <<"EOT"; +This is lwp-mirror version $VERSION ($DISTNAME) + +Copyright 1995-1999, Gisle Aas. + +This program is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. +EOT +} + +$url = decode(locale => shift) or usage(); +$file = encode(locale_fs => decode(locale => shift)) or usage(); +usage() if $opt_h or @ARGV; + +if (defined $opt_t) { + $opt_t =~ /^(\d+)([smh])?/; + die "$progname: Illegal timeout value!\n" unless defined $1; + $timeout = $1; + $timeout *= 60 if ($2 eq "m"); + $timeout *= 3600 if ($2 eq "h"); + $ua->timeout($timeout); +} + +$rc = mirror($url, $file); + +if ($rc == 304) { + print STDERR "$progname: $file is up to date\n" +} +elsif (!is_success($rc)) { + print STDERR "$progname: $rc ", status_message($rc), " ($url)\n"; + exit 1; +} +exit; + + +sub usage +{ + die <<"EOT"; +Usage: $progname [-options] <url> <file> + -v print version number of program + -t <timeout> Set timeout value +EOT +} diff --git a/bin/lwp-request b/bin/lwp-request new file mode 100755 index 0000000..d934404 --- /dev/null +++ b/bin/lwp-request @@ -0,0 +1,552 @@ +#!/usr/bin/perl -w + +# Simple user agent using LWP library. + +=head1 NAME + +lwp-request, GET, POST, HEAD - Simple command line user agent + +=head1 SYNOPSIS + +B<lwp-request> [B<-afPuUsSedvhx>] [B<-m> I<method>] [B<-b> I<base URL>] [B<-t> I<timeout>] + [B<-i> I<if-modified-since>] [B<-c> I<content-type>] + [B<-C> I<credentials>] [B<-p> I<proxy-url>] [B<-o> I<format>] I<url>... + +=head1 DESCRIPTION + +This program can be used to send requests to WWW servers and your +local file system. The request content for POST and PUT +methods is read from stdin. The content of the response is printed on +stdout. Error messages are printed on stderr. The program returns a +status value indicating the number of URLs that failed. + +The options are: + +=over 4 + +=item -m <method> + +Set which method to use for the request. If this option is not used, +then the method is derived from the name of the program. + +=item -f + +Force request through, even if the program believes that the method is +illegal. The server might reject the request eventually. + +=item -b <uri> + +This URI will be used as the base URI for resolving all relative URIs +given as argument. + +=item -t <timeout> + +Set the timeout value for the requests. The timeout is the amount of +time that the program will wait for a response from the remote server +before it fails. The default unit for the timeout value is seconds. +You might append "m" or "h" to the timeout value to make it minutes or +hours, respectively. The default timeout is '3m', i.e. 3 minutes. + +=item -i <time> + +Set the If-Modified-Since header in the request. If I<time> is the +name of a file, use the modification timestamp for this file. If +I<time> is not a file, it is parsed as a literal date. Take a look at +L<HTTP::Date> for recognized formats. + +=item -c <content-type> + +Set the Content-Type for the request. This option is only allowed for +requests that take a content, i.e. POST and PUT. You can +force methods to take content by using the C<-f> option together with +C<-c>. The default Content-Type for POST is +C<application/x-www-form-urlencoded>. The default Content-type for +the others is C<text/plain>. + +=item -p <proxy-url> + +Set the proxy to be used for the requests. The program also loads +proxy settings from the environment. You can disable this with the +C<-P> option. + +=item -P + +Don't load proxy settings from environment. + +=item -H <header> + +Send this HTTP header with each request. You can specify several, e.g.: + + lwp-request \ + -H 'Referer: http://other.url/' \ + -H 'Host: somehost' \ + http://this.url/ + +=item -C <username>:<password> + +Provide credentials for documents that are protected by Basic +Authentication. If the document is protected and you did not specify +the username and password with this option, then you will be prompted +to provide these values. + +=back + +The following options controls what is displayed by the program: + +=over 4 + +=item -u + +Print request method and absolute URL as requests are made. + +=item -U + +Print request headers in addition to request method and absolute URL. + +=item -s + +Print response status code. This option is always on for HEAD requests. + +=item -S + +Print response status chain. This shows redirect and authorization +requests that are handled by the library. + +=item -e + +Print response headers. This option is always on for HEAD requests. + +=item -E + +Print response status chain with full response headers. + +=item -d + +Do B<not> print the content of the response. + +=item -o <format> + +Process HTML content in various ways before printing it. If the +content type of the response is not HTML, then this option has no +effect. The legal format values are; I<text>, I<ps>, I<links>, +I<html> and I<dump>. + +If you specify the I<text> format then the HTML will be formatted as +plain latin1 text. If you specify the I<ps> format then it will be +formatted as Postscript. + +The I<links> format will output all links found in the HTML document. +Relative links will be expanded to absolute ones. + +The I<html> format will reformat the HTML code and the I<dump> format +will just dump the HTML syntax tree. + +Note that the C<HTML-Tree> distribution needs to be installed for this +option to work. In addition the C<HTML-Format> distribution needs to +be installed for I<-o text> or I<-o ps> to work. + +=item -v + +Print the version number of the program and quit. + +=item -h + +Print usage message and quit. + +=item -a + +Set text(ascii) mode for content input and output. If this option is not +used, content input and output is done in binary mode. + +=back + +Because this program is implemented using the LWP library, it will +only support the protocols that LWP supports. + +=head1 SEE ALSO + +L<lwp-mirror>, L<LWP> + +=head1 COPYRIGHT + +Copyright 1995-1999 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=head1 AUTHOR + +Gisle Aas <gisle@aas.no> + +=cut + +$progname = $0; +$progname =~ s,.*[\\/],,; # use basename only +$progname =~ s/\.\w*$//; # strip extension, if any + +$VERSION = "6.09"; + + +require LWP; + +use URI; +use URI::Heuristic qw(uf_uri); +use Encode; +use Encode::Locale; + +use HTTP::Status qw(status_message); +use HTTP::Date qw(time2str str2time); + + +# This table lists the methods that are allowed. It should really be +# a superset for all methods supported for every scheme that may be +# supported by the library. Currently it might be a bit too HTTP +# specific. You might use the -f option to force a method through. +# +# "" = No content in request, "C" = Needs content in request +# +%allowed_methods = ( + GET => "", + HEAD => "", + POST => "C", + PUT => "C", + DELETE => "", + TRACE => "", + OPTIONS => "", +); + + +# We make our own specialization of LWP::UserAgent that asks for +# user/password if document is protected. +{ + package RequestAgent; + @ISA = qw(LWP::UserAgent); + + sub new + { + my $self = LWP::UserAgent::new(@_); + $self->agent("lwp-request/$main::VERSION "); + $self; + } + + sub get_basic_credentials + { + my($self, $realm, $uri) = @_; + if ($main::options{'C'}) { + return split(':', $main::options{'C'}, 2); + } + elsif (-t) { + my $netloc = $uri->host_port; + print STDERR "Enter username for $realm at $netloc: "; + my $user = <STDIN>; + chomp($user); + return (undef, undef) unless length $user; + print STDERR "Password: "; + system("stty -echo"); + my $password = <STDIN>; + system("stty echo"); + print STDERR "\n"; # because we disabled echo + chomp($password); + return ($user, $password); + } + else { + return (undef, undef) + } + } +} + +$method = uc(lc($progname) eq "lwp-request" ? "GET" : $progname); + +# Parse command line +use Getopt::Long; + +my @getopt_args = ( + 'a', # content i/o in text(ascii) mode + 'm=s', # set method + 'f', # make request even if method is not in %allowed_methods + 'b=s', # base url + 't=s', # timeout + 'i=s', # if-modified-since + 'c=s', # content type for POST + 'C=s', # credentials for basic authorization + 'H=s@', # extra headers, form "Header: value string" + # + 'u', # display method and URL of request + 'U', # display request headers also + 's', # display status code + 'S', # display whole chain of status codes + 'e', # display response headers (default for HEAD) + 'E', # display whole chain of headers + 'd', # don't display content + # + 'h', # print usage + 'v', # print version + # + 'p=s', # proxy URL + 'P', # don't load proxy setting from environment + # + 'o=s', # output format +); + +Getopt::Long::config("noignorecase", "bundling"); +unless (GetOptions(\%options, @getopt_args)) { + usage(); +} +if ($options{'v'}) { + require LWP; + my $DISTNAME = 'libwww-perl-' . LWP::Version(); + die <<"EOT"; +This is lwp-request version $VERSION ($DISTNAME) + +Copyright 1995-1999, Gisle Aas. + +This program is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. +EOT +} + +usage() if $options{'h'} || !@ARGV; + +# Create the user agent object +$ua = RequestAgent->new; + +# Load proxy settings from *_proxy environment variables. +$ua->env_proxy unless $options{'P'}; + +$method = uc($options{'m'}) if defined $options{'m'}; + +if ($options{'f'}) { + if ($options{'c'}) { + $allowed_methods{$method} = "C"; # force content + } + else { + $allowed_methods{$method} = ""; + } +} +elsif (!defined $allowed_methods{$method}) { + die "$progname: $method is not an allowed method\n"; +} + +if ($options{'S'} || $options{'E'}) { + $options{'U'} = 1 if $options{'E'}; + $options{'E'} = 1 if $options{'e'}; + $options{'S'} = 1; + $options{'s'} = 1; + $options{'u'} = 1; +} + +if ($method eq "HEAD") { + $options{'s'} = 1; + $options{'e'} = 1 unless $options{'d'}; + $options{'d'} = 1; +} + +$options{'u'} = 1 if $options{'U'}; +$options{'s'} = 1 if $options{'e'}; + +if (defined $options{'t'}) { + $options{'t'} =~ /^(\d+)([smh])?/; + die "$progname: Illegal timeout value!\n" unless defined $1; + $timeout = $1; + if (defined $2) { + $timeout *= 60 if $2 eq "m"; + $timeout *= 3600 if $2 eq "h"; + } + $ua->timeout($timeout); +} + +if (defined $options{'i'}) { + if (-e $options{'i'}) { + $time = (stat _)[9]; + } + else { + $time = str2time($options{'i'}); + die "$progname: Illegal time syntax for -i option\n" + unless defined $time; + } + $options{'i'} = time2str($time); +} + +$content = undef; +$user_ct = undef; +if ($allowed_methods{$method} eq "C") { + # This request needs some content + unless (defined $options{'c'}) { + # set default content type + $options{'c'} = ($method eq "POST") ? + "application/x-www-form-urlencoded" + : "text/plain"; + } + else { + die "$progname: Illegal Content-type format\n" + unless $options{'c'} =~ m,^[\w\-]+/[\w\-.+]+(?:\s*;.*)?$,; + $user_ct++; + } + print STDERR "Please enter content ($options{'c'}) to be ${method}ed:\n" + if -t; + binmode STDIN unless -t or $options{'a'}; + $content = join("", <STDIN>); +} +else { + die "$progname: Can't set Content-type for $method requests\n" + if defined $options{'c'}; +} + +# Set up a request. We will use the same request object for all URLs. +$request = HTTP::Request->new($method); +$request->header('If-Modified-Since', $options{'i'}) if defined $options{'i'}; +for my $user_header (@{ $options{'H'} || [] }) { + my ($header_name, $header_value) = split /\s*:\s*/, $user_header, 2; + $header_name =~ s/^\s+//; + if (lc($header_name) eq "user-agent") { + $header_value .= $ua->agent if $header_value =~ /\s\z/; + $ua->agent($header_value); + } + else { + $request->push_header($header_name, $header_value); + } +} +#$request->header('Accept', '*/*'); +if ($options{'c'}) { # will always be set for request that wants content + my $header = ($user_ct ? 'header' : 'init_header'); + $request->$header('Content-Type', $options{'c'}); + $request->header('Content-Length', length $content); # Not really needed + $request->content($content); +} + +$errors = 0; + +sub show { + my $r = shift; + my $last = shift; + print $method, " ", $r->request->uri->as_string, "\n" if $options{'u'}; + print $r->request->headers_as_string, "\n" if $options{'U'}; + print $r->status_line, "\n" if $options{'s'}; + print $r->headers_as_string, "\n" if $options{'E'} or $last; +} + +# Ok, now we perform the requests, one URL at a time +while ($url = shift) { + # Create the URL object, but protect us against bad URLs + eval { + if ($url =~ /^\w+:/ || $options{'b'}) { # is there any scheme specification + $url = URI->new(decode(locale => $url), decode(locale => $options{'b'})); + $url = $url->abs(decode(locale => $options{'b'})) if $options{'b'}; + } + else { + $url = uf_uri($url); + } + }; + if ($@) { + $@ =~ s/ at .* line \d+.*//; + print STDERR $@; + $errors++; + next; + } + + $ua->proxy($url->scheme, decode(locale => $options{'p'})) if $options{'p'}; + + # Send the request and get a response back from the server + $request->uri($url); + $response = $ua->request($request); + + if ($options{'S'}) { + for my $r ($response->redirects) { + show($r); + } + } + show($response, $options{'e'}); + + unless ($options{'d'}) { + if ($options{'o'} && + $response->content_type eq 'text/html') { + eval { + require HTML::Parse; + }; + if ($@) { + if ($@ =~ m,^Can't locate HTML/Parse.pm in \@INC,) { + die "The HTML-Tree distribution need to be installed for the -o option to be used.\n"; + } + else { + die $@; + } + } + my $html = HTML::Parse::parse_html($response->content); + { + $options{'o'} eq 'ps' && do { + require HTML::FormatPS; + my $f = HTML::FormatPS->new; + print $f->format($html); + last; + }; + $options{'o'} eq 'text' && do { + require HTML::FormatText; + my $f = HTML::FormatText->new; + print $f->format($html); + last; + }; + $options{'o'} eq 'html' && do { + print $html->as_HTML; + last; + }; + $options{'o'} eq 'links' && do { + my $base = $response->base; + $base = $options{'b'} if $options{'b'}; + for ( @{ $html->extract_links } ) { + my($link, $elem) = @$_; + my $tag = uc $elem->tag; + $link = URI->new($link)->abs($base)->as_string; + print "$tag\t$link\n"; + } + last; + }; + $options{'o'} eq 'dump' && do { + $html->dump; + last; + }; + # It is bad to not notice this before now :-( + die "Illegal -o option value ($options{'o'})\n"; + } + } + else { + binmode STDOUT unless $options{'a'}; + print $response->content; + } + } + + $errors++ unless $response->is_success; +} + +exit $errors; + + +sub usage +{ + die <<"EOT"; +Usage: $progname [-options] <url>... + -m <method> use method for the request (default is '$method') + -f make request even if $progname believes method is illegal + -b <base> Use the specified URL as base + -t <timeout> Set timeout value + -i <time> Set the If-Modified-Since header on the request + -c <conttype> use this content-type for POST, PUT, CHECKIN + -a Use text mode for content I/O + -p <proxyurl> use this as a proxy + -P don't load proxy settings from environment + -H <header> send this HTTP header (you can specify several) + -C <username>:<password> + provide credentials for basic authentication + + -u Display method and URL before any response + -U Display request headers (implies -u) + -s Display response status code + -S Display response status chain (implies -u) + -e Display response headers (implies -s) + -E Display whole chain of headers (implies -S and -U) + -d Do not display content + -o <format> Process HTML content in various ways + + -v Show program version + -h Print this message +EOT +} diff --git a/lib/LWP.pm b/lib/LWP.pm new file mode 100644 index 0000000..fa5d109 --- /dev/null +++ b/lib/LWP.pm @@ -0,0 +1,669 @@ +package LWP; + +$VERSION = "6.13"; +sub Version { $VERSION; } + +require 5.008; +require LWP::UserAgent; # this should load everything you need + +1; + +__END__ + +=encoding utf-8 + +=head1 NAME + +LWP - The World-Wide Web library for Perl + +=head1 SYNOPSIS + + use LWP; + print "This is libwww-perl-$LWP::VERSION\n"; + + +=head1 DESCRIPTION + +The libwww-perl collection is a set of Perl modules which provides a +simple and consistent application programming interface (API) to the +World-Wide Web. The main focus of the library is to provide classes +and functions that allow you to write WWW clients. The library also +contain modules that are of more general use and even classes that +help you implement simple HTTP servers. + +Most modules in this library provide an object oriented API. The user +agent, requests sent and responses received from the WWW server are +all represented by objects. This makes a simple and powerful +interface to these services. The interface is easy to extend +and customize for your own needs. + +The main features of the library are: + +=over 3 + +=item * + +Contains various reusable components (modules) that can be +used separately or together. + +=item * + +Provides an object oriented model of HTTP-style communication. Within +this framework we currently support access to http, https, gopher, ftp, news, +file, and mailto resources. + +=item * + +Provides a full object oriented interface or +a very simple procedural interface. + +=item * + +Supports the basic and digest authorization schemes. + +=item * + +Supports transparent redirect handling. + +=item * + +Supports access through proxy servers. + +=item * + +Provides parser for F<robots.txt> files and a framework for constructing robots. + +=item * + +Supports parsing of HTML forms. + +=item * + +Implements HTTP content negotiation algorithm that can +be used both in protocol modules and in server scripts (like CGI +scripts). + +=item * + +Supports HTTP cookies. + +=item * + +Some simple command line clients, for instance C<lwp-request> and C<lwp-download>. + +=back + + +=head1 HTTP STYLE COMMUNICATION + + +The libwww-perl library is based on HTTP style communication. This +section tries to describe what that means. + +Let us start with this quote from the HTTP specification document +<URL:http://www.w3.org/Protocols/>: + +=over 3 + +=item * + +The HTTP protocol is based on a request/response paradigm. A client +establishes a connection with a server and sends a request to the +server in the form of a request method, URI, and protocol version, +followed by a MIME-like message containing request modifiers, client +information, and possible body content. The server responds with a +status line, including the message's protocol version and a success or +error code, followed by a MIME-like message containing server +information, entity meta-information, and possible body content. + +=back + +What this means to libwww-perl is that communication always take place +through these steps: First a I<request> object is created and +configured. This object is then passed to a server and we get a +I<response> object in return that we can examine. A request is always +independent of any previous requests, i.e. the service is stateless. +The same simple model is used for any kind of service we want to +access. + +For example, if we want to fetch a document from a remote file server, +then we send it a request that contains a name for that document and +the response will contain the document itself. If we access a search +engine, then the content of the request will contain the query +parameters and the response will contain the query result. If we want +to send a mail message to somebody then we send a request object which +contains our message to the mail server and the response object will +contain an acknowledgment that tells us that the message has been +accepted and will be forwarded to the recipient(s). + +It is as simple as that! + + +=head2 The Request Object + +The libwww-perl request object has the class name C<HTTP::Request>. +The fact that the class name uses C<HTTP::> as a +prefix only implies that we use the HTTP model of communication. It +does not limit the kind of services we can try to pass this I<request> +to. For instance, we will send C<HTTP::Request>s both to ftp and +gopher servers, as well as to the local file system. + +The main attributes of the request objects are: + +=over 3 + +=item * + +B<method> is a short string that tells what kind of +request this is. The most common methods are B<GET>, B<PUT>, +B<POST> and B<HEAD>. + +=item * + +B<uri> is a string denoting the protocol, server and +the name of the "document" we want to access. The B<uri> might +also encode various other parameters. + +=item * + +B<headers> contains additional information about the +request and can also used to describe the content. The headers +are a set of keyword/value pairs. + +=item * + +B<content> is an arbitrary amount of data. + +=back + +=head2 The Response Object + +The libwww-perl response object has the class name C<HTTP::Response>. +The main attributes of objects of this class are: + +=over 3 + +=item * + +B<code> is a numerical value that indicates the overall +outcome of the request. + +=item * + +B<message> is a short, human readable string that +corresponds to the I<code>. + +=item * + +B<headers> contains additional information about the +response and describe the content. + +=item * + +B<content> is an arbitrary amount of data. + +=back + +Since we don't want to handle all possible I<code> values directly in +our programs, a libwww-perl response object has methods that can be +used to query what kind of response this is. The most commonly used +response classification methods are: + +=over 3 + +=item is_success() + +The request was successfully received, understood or accepted. + +=item is_error() + +The request failed. The server or the resource might not be +available, access to the resource might be denied or other things might +have failed for some reason. + +=back + +=head2 The User Agent + +Let us assume that we have created a I<request> object. What do we +actually do with it in order to receive a I<response>? + +The answer is that you pass it to a I<user agent> object and this +object takes care of all the things that need to be done +(like low-level communication and error handling) and returns +a I<response> object. The user agent represents your +application on the network and provides you with an interface that +can accept I<requests> and return I<responses>. + +The user agent is an interface layer between +your application code and the network. Through this interface you are +able to access the various servers on the network. + +The class name for the user agent is C<LWP::UserAgent>. Every +libwww-perl application that wants to communicate should create at +least one object of this class. The main method provided by this +object is request(). This method takes an C<HTTP::Request> object as +argument and (eventually) returns a C<HTTP::Response> object. + +The user agent has many other attributes that let you +configure how it will interact with the network and with your +application. + +=over 3 + +=item * + +B<timeout> specifies how much time we give remote servers to +respond before the library disconnects and creates an +internal I<timeout> response. + +=item * + +B<agent> specifies the name that your application uses when it +presents itself on the network. + +=item * + +B<from> can be set to the e-mail address of the person +responsible for running the application. If this is set, then the +address will be sent to the servers with every request. + +=item * + +B<parse_head> specifies whether we should initialize response +headers from the E<lt>head> section of HTML documents. + +=item * + +B<proxy> and B<no_proxy> specify if and when to go through +a proxy server. <URL:http://www.w3.org/History/1994/WWW/Proxies/> + +=item * + +B<credentials> provides a way to set up user names and +passwords needed to access certain services. + +=back + +Many applications want even more control over how they interact +with the network and they get this by sub-classing +C<LWP::UserAgent>. The library includes a +sub-class, C<LWP::RobotUA>, for robot applications. + +=head2 An Example + +This example shows how the user agent, a request and a response are +represented in actual perl code: + + # Create a user agent object + use LWP::UserAgent; + my $ua = LWP::UserAgent->new; + $ua->agent("MyApp/0.1 "); + + # Create a request + my $req = HTTP::Request->new(POST => 'http://search.cpan.org/search'); + $req->content_type('application/x-www-form-urlencoded'); + $req->content('query=libwww-perl&mode=dist'); + + # Pass request to the user agent and get a response back + my $res = $ua->request($req); + + # Check the outcome of the response + if ($res->is_success) { + print $res->content; + } + else { + print $res->status_line, "\n"; + } + +The $ua is created once when the application starts up. New request +objects should normally created for each request sent. + + +=head1 NETWORK SUPPORT + +This section discusses the various protocol schemes and +the HTTP style methods that headers may be used for each. + +For all requests, a "User-Agent" header is added and initialized from +the $ua->agent attribute before the request is handed to the network +layer. In the same way, a "From" header is initialized from the +$ua->from attribute. + +For all responses, the library adds a header called "Client-Date". +This header holds the time when the response was received by +your application. The format and semantics of the header are the +same as the server created "Date" header. You may also encounter other +"Client-XXX" headers. They are all generated by the library +internally and are not received from the servers. + +=head2 HTTP Requests + +HTTP requests are just handed off to an HTTP server and it +decides what happens. Few servers implement methods beside the usual +"GET", "HEAD", "POST" and "PUT", but CGI-scripts may implement +any method they like. + +If the server is not available then the library will generate an +internal error response. + +The library automatically adds a "Host" and a "Content-Length" header +to the HTTP request before it is sent over the network. + +For a GET request you might want to add a "If-Modified-Since" or +"If-None-Match" header to make the request conditional. + +For a POST request you should add the "Content-Type" header. When you +try to emulate HTML E<lt>FORM> handling you should usually let the value +of the "Content-Type" header be "application/x-www-form-urlencoded". +See L<lwpcook> for examples of this. + +The libwww-perl HTTP implementation currently support the HTTP/1.1 +and HTTP/1.0 protocol. + +The library allows you to access proxy server through HTTP. This +means that you can set up the library to forward all types of request +through the HTTP protocol module. See L<LWP::UserAgent> for +documentation of this. + + +=head2 HTTPS Requests + +HTTPS requests are HTTP requests over an encrypted network connection +using the SSL protocol developed by Netscape. Everything about HTTP +requests above also apply to HTTPS requests. In addition the library +will add the headers "Client-SSL-Cipher", "Client-SSL-Cert-Subject" and +"Client-SSL-Cert-Issuer" to the response. These headers denote the +encryption method used and the name of the server owner. + +The request can contain the header "If-SSL-Cert-Subject" in order to +make the request conditional on the content of the server certificate. +If the certificate subject does not match, no request is sent to the +server and an internally generated error response is returned. The +value of the "If-SSL-Cert-Subject" header is interpreted as a Perl +regular expression. + + +=head2 FTP Requests + +The library currently supports GET, HEAD and PUT requests. GET +retrieves a file or a directory listing from an FTP server. PUT +stores a file on a ftp server. + +You can specify a ftp account for servers that want this in addition +to user name and password. This is specified by including an "Account" +header in the request. + +User name/password can be specified using basic authorization or be +encoded in the URL. Failed logins return an UNAUTHORIZED response with +"WWW-Authenticate: Basic" and can be treated like basic authorization +for HTTP. + +The library supports ftp ASCII transfer mode by specifying the "type=a" +parameter in the URL. It also supports transfer of ranges for FTP transfers +using the "Range" header. + +Directory listings are by default returned unprocessed (as returned +from the ftp server) with the content media type reported to be +"text/ftp-dir-listing". The C<File::Listing> module provides methods +for parsing of these directory listing. + +The ftp module is also able to convert directory listings to HTML and +this can be requested via the standard HTTP content negotiation +mechanisms (add an "Accept: text/html" header in the request if you +want this). + +For normal file retrievals, the "Content-Type" is guessed based on the +file name suffix. See L<LWP::MediaTypes>. + +The "If-Modified-Since" request header works for servers that implement +the MDTM command. It will probably not work for directory listings though. + +Example: + + $req = HTTP::Request->new(GET => 'ftp://me:passwd@ftp.some.where.com/'); + $req->header(Accept => "text/html, */*;q=0.1"); + +=head2 News Requests + +Access to the USENET News system is implemented through the NNTP +protocol. The name of the news server is obtained from the +NNTP_SERVER environment variable and defaults to "news". It is not +possible to specify the hostname of the NNTP server in news: URLs. + +The library supports GET and HEAD to retrieve news articles through the +NNTP protocol. You can also post articles to newsgroups by using +(surprise!) the POST method. + +GET on newsgroups is not implemented yet. + +Examples: + + $req = HTTP::Request->new(GET => 'news:abc1234@a.sn.no'); + + $req = HTTP::Request->new(POST => 'news:comp.lang.perl.test'); + $req->header(Subject => 'This is a test', + From => 'me@some.where.org'); + $req->content(<<EOT); + This is the content of the message that we are sending to + the world. + EOT + + +=head2 Gopher Request + +The library supports the GET and HEAD methods for gopher requests. All +request header values are ignored. HEAD cheats and returns a +response without even talking to server. + +Gopher menus are always converted to HTML. + +The response "Content-Type" is generated from the document type +encoded (as the first letter) in the request URL path itself. + +Example: + + $req = HTTP::Request->new(GET => 'gopher://gopher.sn.no/'); + + + +=head2 File Request + +The library supports GET and HEAD methods for file requests. The +"If-Modified-Since" header is supported. All other headers are +ignored. The I<host> component of the file URL must be empty or set +to "localhost". Any other I<host> value will be treated as an error. + +Directories are always converted to an HTML document. For normal +files, the "Content-Type" and "Content-Encoding" in the response are +guessed based on the file suffix. + +Example: + + $req = HTTP::Request->new(GET => 'file:/etc/passwd'); + + +=head2 Mailto Request + +You can send (aka "POST") mail messages using the library. All +headers specified for the request are passed on to the mail system. +The "To" header is initialized from the mail address in the URL. + +Example: + + $req = HTTP::Request->new(POST => 'mailto:libwww@perl.org'); + $req->header(Subject => "subscribe"); + $req->content("Please subscribe me to the libwww-perl mailing list!\n"); + +=head2 CPAN Requests + +URLs with scheme C<cpan:> are redirected to the a suitable CPAN +mirror. If you have your own local mirror of CPAN you might tell LWP +to use it for C<cpan:> URLs by an assignment like this: + + $LWP::Protocol::cpan::CPAN = "file:/local/CPAN/"; + +Suitable CPAN mirrors are also picked up from the configuration for +the CPAN.pm, so if you have used that module a suitable mirror should +be picked automatically. If neither of these apply, then a redirect +to the generic CPAN http location is issued. + +Example request to download the newest perl: + + $req = HTTP::Request->new(GET => "cpan:src/latest.tar.gz"); + + +=head1 OVERVIEW OF CLASSES AND PACKAGES + +This table should give you a quick overview of the classes provided by the +library. Indentation shows class inheritance. + + LWP::MemberMixin -- Access to member variables of Perl5 classes + LWP::UserAgent -- WWW user agent class + LWP::RobotUA -- When developing a robot applications + LWP::Protocol -- Interface to various protocol schemes + LWP::Protocol::http -- http:// access + LWP::Protocol::file -- file:// access + LWP::Protocol::ftp -- ftp:// access + ... + + LWP::Authen::Basic -- Handle 401 and 407 responses + LWP::Authen::Digest + + HTTP::Headers -- MIME/RFC822 style header (used by HTTP::Message) + HTTP::Message -- HTTP style message + HTTP::Request -- HTTP request + HTTP::Response -- HTTP response + HTTP::Daemon -- A HTTP server class + + WWW::RobotRules -- Parse robots.txt files + WWW::RobotRules::AnyDBM_File -- Persistent RobotRules + + Net::HTTP -- Low level HTTP client + +The following modules provide various functions and definitions. + + LWP -- This file. Library version number and documentation. + LWP::MediaTypes -- MIME types configuration (text/html etc.) + LWP::Simple -- Simplified procedural interface for common functions + HTTP::Status -- HTTP status code (200 OK etc) + HTTP::Date -- Date parsing module for HTTP date formats + HTTP::Negotiate -- HTTP content negotiation calculation + File::Listing -- Parse directory listings + HTML::Form -- Processing for <form>s in HTML documents + + +=head1 MORE DOCUMENTATION + +All modules contain detailed information on the interfaces they +provide. The L<lwpcook> manpage is the libwww-perl cookbook that contain +examples of typical usage of the library. You might want to take a +look at how the scripts L<lwp-request>, L<lwp-download>, L<lwp-dump> +and L<lwp-mirror> are implemented. + +=head1 ENVIRONMENT + +The following environment variables are used by LWP: + +=over + +=item HOME + +The C<LWP::MediaTypes> functions will look for the F<.media.types> and +F<.mime.types> files relative to you home directory. + +=item http_proxy + +=item ftp_proxy + +=item xxx_proxy + +=item no_proxy + +These environment variables can be set to enable communication through +a proxy server. See the description of the C<env_proxy> method in +L<LWP::UserAgent>. + +=item PERL_LWP_ENV_PROXY + +If set to a TRUE value, then the C<LWP::UserAgent> will by default call +C<env_proxy> during initialization. This makes LWP honor the proxy variables +described above. + +=item PERL_LWP_SSL_VERIFY_HOSTNAME + +The default C<verify_hostname> setting for C<LWP::UserAgent>. If +not set the default will be 1. Set it as 0 to disable hostname +verification (the default prior to libwww-perl 5.840. + +=item PERL_LWP_SSL_CA_FILE + +=item PERL_LWP_SSL_CA_PATH + +The file and/or directory +where the trusted Certificate Authority certificates +is located. See L<LWP::UserAgent> for details. + +=item PERL_HTTP_URI_CLASS + +Used to decide what URI objects to instantiate. The default is C<URI>. +You might want to set it to C<URI::URL> for compatibility with old times. + +=back + +=head1 AUTHORS + +LWP was made possible by contributions from Adam Newby, Albert +Dvornik, Alexandre Duret-Lutz, Andreas Gustafsson, Andreas König, +Andrew Pimlott, Andy Lester, Ben Coleman, Benjamin Low, Ben Low, Ben +Tilly, Blair Zajac, Bob Dalgleish, BooK, Brad Hughes, Brian +J. Murrell, Brian McCauley, Charles C. Fu, Charles Lane, Chris Nandor, +Christian Gilmore, Chris W. Unger, Craig Macdonald, Dale Couch, Dan +Kubb, Dave Dunkin, Dave W. Smith, David Coppit, David Dick, David +D. Kilzer, Doug MacEachern, Edward Avis, erik, Gary Shea, Gisle Aas, +Graham Barr, Gurusamy Sarathy, Hans de Graaff, Harald Joerg, Harry +Bochner, Hugo, Ilya Zakharevich, INOUE Yoshinari, Ivan Panchenko, Jack +Shirazi, James Tillman, Jan Dubois, Jared Rhine, Jim Stern, Joao +Lopes, John Klar, Johnny Lee, Josh Kronengold, Josh Rai, Joshua +Chamas, Joshua Hoblitt, Kartik Subbarao, Keiichiro Nagano, Ken +Williams, KONISHI Katsuhiro, Lee T Lindley, Liam Quinn, Marc Hedlund, +Marc Langheinrich, Mark D. Anderson, Marko Asplund, Mark Stosberg, +Markus B Krüger, Markus Laker, Martijn Koster, Martin Thurn, Matthew +Eldridge, Matthew.van.Eerde, Matt Sergeant, Michael A. Chase, Michael +Quaranta, Michael Thompson, Mike Schilli, Moshe Kaminsky, Nathan +Torkington, Nicolai Langfeldt, Norton Allen, Olly Betts, Paul +J. Schinder, peterm, Philip GuentherDaniel Buenzli, Pon Hwa Lin, +Radoslaw Zielinski, Radu Greab, Randal L. Schwartz, Richard Chen, +Robin Barker, Roy Fielding, Sander van Zoest, Sean M. Burke, +shildreth, Slaven Rezic, Steve A Fink, Steve Hay, Steven Butler, +Steve_Kilbane, Takanori Ugai, Thomas Lotterer, Tim Bunce, Tom Hughes, +Tony Finch, Ville Skyttä, Ward Vandewege, William York, Yale Huang, +and Yitzchak Scott-Thoennes. + +LWP owes a lot in motivation, design, and code, to the libwww-perl +library for Perl4 by Roy Fielding, which included work from Alberto +Accomazzi, James Casey, Brooks Cutter, Martijn Koster, Oscar +Nierstrasz, Mel Melchner, Gertjan van Oosten, Jared Rhine, Jack +Shirazi, Gene Spafford, Marc VanHeyningen, Steven E. Brenner, Marion +Hakanson, Waldemar Kebsch, Tony Sanders, and Larry Wall; see the +libwww-perl-0.40 library for details. + +=head1 COPYRIGHT + + Copyright 1995-2009, Gisle Aas + Copyright 1995, Martijn Koster + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=head1 AVAILABILITY + +The latest version of this library is likely to be available from CPAN +as well as: + + http://github.com/libwww-perl/libwww-perl + +The best place to discuss this code is on the <libwww@perl.org> +mailing list. + +=cut diff --git a/lib/LWP/Authen/Basic.pm b/lib/LWP/Authen/Basic.pm new file mode 100644 index 0000000..e7815bd --- /dev/null +++ b/lib/LWP/Authen/Basic.pm @@ -0,0 +1,65 @@ +package LWP::Authen::Basic; +use strict; + +require MIME::Base64; + +sub auth_header { + my($class, $user, $pass) = @_; + return "Basic " . MIME::Base64::encode("$user:$pass", ""); +} + +sub authenticate +{ + my($class, $ua, $proxy, $auth_param, $response, + $request, $arg, $size) = @_; + + my $realm = $auth_param->{realm} || ""; + my $url = $proxy ? $request->{proxy} : $request->uri_canonical; + return $response unless $url; + my $host_port = $url->host_port; + my $auth_header = $proxy ? "Proxy-Authorization" : "Authorization"; + + my @m = $proxy ? (m_proxy => $url) : (m_host_port => $host_port); + push(@m, realm => $realm); + + my $h = $ua->get_my_handler("request_prepare", @m, sub { + $_[0]{callback} = sub { + my($req, $ua, $h) = @_; + my($user, $pass) = $ua->credentials($host_port, $h->{realm}); + if (defined $user) { + my $auth_value = $class->auth_header($user, $pass, $req, $ua, $h); + $req->header($auth_header => $auth_value); + } + }; + }); + $h->{auth_param} = $auth_param; + + if (!$proxy && !$request->header($auth_header) && $ua->credentials($host_port, $realm)) { + # we can make sure this handler applies and retry + add_path($h, $url->path); + return $ua->request($request->clone, $arg, $size, $response); + } + + my($user, $pass) = $ua->get_basic_credentials($realm, $url, $proxy); + unless (defined $user and defined $pass) { + $ua->set_my_handler("request_prepare", undef, @m); # delete handler + return $response; + } + + # check that the password has changed + my ($olduser, $oldpass) = $ua->credentials($host_port, $realm); + return $response if (defined $olduser and defined $oldpass and + $user eq $olduser and $pass eq $oldpass); + + $ua->credentials($host_port, $realm, $user, $pass); + add_path($h, $url->path) unless $proxy; + return $ua->request($request->clone, $arg, $size, $response); +} + +sub add_path { + my($h, $path) = @_; + $path =~ s,[^/]+\z,,; + push(@{$h->{m_path_prefix}}, $path); +} + +1; diff --git a/lib/LWP/Authen/Digest.pm b/lib/LWP/Authen/Digest.pm new file mode 100644 index 0000000..6fe542e --- /dev/null +++ b/lib/LWP/Authen/Digest.pm @@ -0,0 +1,75 @@ +package LWP::Authen::Digest; + +use strict; +use base 'LWP::Authen::Basic'; + +require Digest::MD5; + +sub auth_header { + my($class, $user, $pass, $request, $ua, $h) = @_; + + my $auth_param = $h->{auth_param}; + + my $nc = sprintf "%08X", ++$ua->{authen_md5_nonce_count}{$auth_param->{nonce}}; + my $cnonce = sprintf "%8x", time; + + my $uri = $request->uri->path_query; + $uri = "/" unless length $uri; + + my $md5 = Digest::MD5->new; + + my(@digest); + $md5->add(join(":", $user, $auth_param->{realm}, $pass)); + push(@digest, $md5->hexdigest); + $md5->reset; + + push(@digest, $auth_param->{nonce}); + + if ($auth_param->{qop}) { + push(@digest, $nc, $cnonce, ($auth_param->{qop} =~ m|^auth[,;]auth-int$|) ? 'auth' : $auth_param->{qop}); + } + + $md5->add(join(":", $request->method, $uri)); + push(@digest, $md5->hexdigest); + $md5->reset; + + $md5->add(join(":", @digest)); + my($digest) = $md5->hexdigest; + $md5->reset; + + my %resp = map { $_ => $auth_param->{$_} } qw(realm nonce opaque); + @resp{qw(username uri response algorithm)} = ($user, $uri, $digest, "MD5"); + + if (($auth_param->{qop} || "") =~ m|^auth([,;]auth-int)?$|) { + @resp{qw(qop cnonce nc)} = ("auth", $cnonce, $nc); + } + + my(@order) = qw(username realm qop algorithm uri nonce nc cnonce response); + if($request->method =~ /^(?:POST|PUT)$/) { + $md5->add($request->content); + my $content = $md5->hexdigest; + $md5->reset; + $md5->add(join(":", @digest[0..1], $content)); + $md5->reset; + $resp{"message-digest"} = $md5->hexdigest; + push(@order, "message-digest"); + } + push(@order, "opaque"); + my @pairs; + for (@order) { + next unless defined $resp{$_}; + + # RFC2617 sais that qop-value and nc-value should be unquoted. + if ( $_ eq 'qop' || $_ eq 'nc' ) { + push(@pairs, "$_=" . $resp{$_}); + } + else { + push(@pairs, "$_=" . qq("$resp{$_}")); + } + } + + my $auth_value = "Digest " . join(", ", @pairs); + return $auth_value; +} + +1; diff --git a/lib/LWP/Authen/Ntlm.pm b/lib/LWP/Authen/Ntlm.pm new file mode 100644 index 0000000..2a571b7 --- /dev/null +++ b/lib/LWP/Authen/Ntlm.pm @@ -0,0 +1,180 @@ +package LWP::Authen::Ntlm; + +use strict; +use vars qw/$VERSION/; + +$VERSION = "6.13"; + +use Authen::NTLM "1.02"; +use MIME::Base64 "2.12"; + +sub authenticate { + my($class, $ua, $proxy, $auth_param, $response, + $request, $arg, $size) = @_; + + my($user, $pass) = $ua->get_basic_credentials($auth_param->{realm}, + $request->uri, $proxy); + + unless(defined $user and defined $pass) { + return $response; + } + + if (!$ua->conn_cache()) { + warn "The keep_alive option must be enabled for NTLM authentication to work. NTLM authentication aborted.\n"; + return $response; + } + + my($domain, $username) = split(/\\/, $user); + + ntlm_domain($domain); + ntlm_user($username); + ntlm_password($pass); + + my $auth_header = $proxy ? "Proxy-Authorization" : "Authorization"; + + # my ($challenge) = $response->header('WWW-Authenticate'); + my $challenge; + foreach ($response->header('WWW-Authenticate')) { + last if /^NTLM/ && ($challenge=$_); + } + + if ($challenge eq 'NTLM') { + # First phase, send handshake + my $auth_value = "NTLM " . ntlm(); + ntlm_reset(); + + # Need to check this isn't a repeated fail! + my $r = $response; + my $retry_count = 0; + while ($r) { + my $auth = $r->request->header($auth_header); + ++$retry_count if ($auth && $auth eq $auth_value); + if ($retry_count > 2) { + # here we know this failed before + $response->header("Client-Warning" => + "Credentials for '$user' failed before"); + return $response; + } + $r = $r->previous; + } + + my $referral = $request->clone; + $referral->header($auth_header => $auth_value); + return $ua->request($referral, $arg, $size, $response); + } + + else { + # Second phase, use the response challenge (unless non-401 code + # was returned, in which case, we just send back the response + # object, as is + my $auth_value; + if ($response->code ne '401') { + return $response; + } + else { + my $challenge; + foreach ($response->header('WWW-Authenticate')) { + last if /^NTLM/ && ($challenge=$_); + } + $challenge =~ s/^NTLM //; + ntlm(); + $auth_value = "NTLM " . ntlm($challenge); + ntlm_reset(); + } + + my $referral = $request->clone; + $referral->header($auth_header => $auth_value); + my $response2 = $ua->request($referral, $arg, $size, $response); + return $response2; + } +} + +1; + + +=head1 NAME + +LWP::Authen::Ntlm - Library for enabling NTLM authentication (Microsoft) in LWP + +=head1 SYNOPSIS + + use LWP::UserAgent; + use HTTP::Request::Common; + my $url = 'http://www.company.com/protected_page.html'; + + # Set up the ntlm client and then the base64 encoded ntlm handshake message + my $ua = LWP::UserAgent->new(keep_alive=>1); + $ua->credentials('www.company.com:80', '', "MyDomain\\MyUserCode", 'MyPassword'); + + $request = GET $url; + print "--Performing request now...-----------\n"; + $response = $ua->request($request); + print "--Done with request-------------------\n"; + + if ($response->is_success) {print "It worked!->" . $response->code . "\n"} + else {print "It didn't work!->" . $response->code . "\n"} + +=head1 DESCRIPTION + +C<LWP::Authen::Ntlm> allows LWP to authenticate against servers that are using the +NTLM authentication scheme popularized by Microsoft. This type of authentication is +common on intranets of Microsoft-centric organizations. + +The module takes advantage of the Authen::NTLM module by Mark Bush. Since there +is also another Authen::NTLM module available from CPAN by Yee Man Chan with an +entirely different interface, it is necessary to ensure that you have the correct +NTLM module. + +In addition, there have been problems with incompatibilities between different +versions of Mime::Base64, which Bush's Authen::NTLM makes use of. Therefore, it is +necessary to ensure that your Mime::Base64 module supports exporting of the +encode_base64 and decode_base64 functions. + +=head1 USAGE + +The module is used indirectly through LWP, rather than including it directly in your +code. The LWP system will invoke the NTLM authentication when it encounters the +authentication scheme while attempting to retrieve a URL from a server. In order +for the NTLM authentication to work, you must have a few things set up in your +code prior to attempting to retrieve the URL: + +=over 4 + +=item * + +Enable persistent HTTP connections + +To do this, pass the "keep_alive=>1" option to the LWP::UserAgent when creating it, like this: + + my $ua = LWP::UserAgent->new(keep_alive=>1); + +=item * + +Set the credentials on the UserAgent object + +The credentials must be set like this: + + $ua->credentials('www.company.com:80', '', "MyDomain\\MyUserCode", 'MyPassword'); + +Note that you cannot use the HTTP::Request object's authorization_basic() method to set +the credentials. Note, too, that the 'www.company.com:80' portion only sets credentials +on the specified port AND it is case-sensitive (this is due to the way LWP is coded, and +has nothing to do with LWP::Authen::Ntlm) + +=back + +=head1 AVAILABILITY + +General queries regarding LWP should be made to the LWP Mailing List. + +Questions specific to LWP::Authen::Ntlm can be forwarded to jtillman@bigfoot.com + +=head1 COPYRIGHT + +Copyright (c) 2002 James Tillman. All rights reserved. This +program is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=head1 SEE ALSO + +L<LWP>, L<LWP::UserAgent>, L<lwpcook>. diff --git a/lib/LWP/ConnCache.pm b/lib/LWP/ConnCache.pm new file mode 100644 index 0000000..4969ec5 --- /dev/null +++ b/lib/LWP/ConnCache.pm @@ -0,0 +1,313 @@ +package LWP::ConnCache; + +use strict; +use vars qw($VERSION $DEBUG); + +$VERSION = "6.13"; + + +sub new { + my($class, %cnf) = @_; + + my $total_capacity = 1; + if (exists $cnf{total_capacity}) { + $total_capacity = delete $cnf{total_capacity}; + } + if (%cnf && $^W) { + require Carp; + Carp::carp("Unrecognised options: @{[sort keys %cnf]}") + } + my $self = bless { cc_conns => [] }, $class; + $self->total_capacity($total_capacity); + $self; +} + + +sub deposit { + my($self, $type, $key, $conn) = @_; + push(@{$self->{cc_conns}}, [$conn, $type, $key, time]); + $self->enforce_limits($type); + return; +} + + +sub withdraw { + my($self, $type, $key) = @_; + my $conns = $self->{cc_conns}; + for my $i (0 .. @$conns - 1) { + my $c = $conns->[$i]; + next unless $c->[1] eq $type && $c->[2] eq $key; + splice(@$conns, $i, 1); # remove it + return $c->[0]; + } + return undef; +} + + +sub total_capacity { + my $self = shift; + my $old = $self->{cc_limit_total}; + if (@_) { + $self->{cc_limit_total} = shift; + $self->enforce_limits; + } + $old; +} + + +sub capacity { + my $self = shift; + my $type = shift; + my $old = $self->{cc_limit}{$type}; + if (@_) { + $self->{cc_limit}{$type} = shift; + $self->enforce_limits($type); + } + $old; +} + + +sub enforce_limits { + my($self, $type) = @_; + my $conns = $self->{cc_conns}; + + my @types = $type ? ($type) : ($self->get_types); + for $type (@types) { + next unless $self->{cc_limit}; + my $limit = $self->{cc_limit}{$type}; + next unless defined $limit; + for my $i (reverse 0 .. @$conns - 1) { + next unless $conns->[$i][1] eq $type; + if (--$limit < 0) { + $self->dropping(splice(@$conns, $i, 1), "$type capacity exceeded"); + } + } + } + + if (defined(my $total = $self->{cc_limit_total})) { + while (@$conns > $total) { + $self->dropping(shift(@$conns), "Total capacity exceeded"); + } + } +} + + +sub dropping { + my($self, $c, $reason) = @_; + print "DROPPING @$c [$reason]\n" if $DEBUG; +} + + +sub drop { + my($self, $checker, $reason) = @_; + if (ref($checker) ne "CODE") { + # make it so + if (!defined $checker) { + $checker = sub { 1 }; # drop all of them + } + elsif (_looks_like_number($checker)) { + my $age_limit = $checker; + my $time_limit = time - $age_limit; + $reason ||= "older than $age_limit"; + $checker = sub { $_[3] < $time_limit }; + } + else { + my $type = $checker; + $reason ||= "drop $type"; + $checker = sub { $_[1] eq $type }; # match on type + } + } + $reason ||= "drop"; + + local $SIG{__DIE__}; # don't interfere with eval below + local $@; + my @c; + for (@{$self->{cc_conns}}) { + my $drop; + eval { + if (&$checker(@$_)) { + $self->dropping($_, $reason); + $drop++; + } + }; + push(@c, $_) unless $drop; + } + @{$self->{cc_conns}} = @c; +} + + +sub prune { + my $self = shift; + $self->drop(sub { !shift->ping }, "ping"); +} + + +sub get_types { + my $self = shift; + my %t; + $t{$_->[1]}++ for @{$self->{cc_conns}}; + return keys %t; +} + + +sub get_connections { + my($self, $type) = @_; + my @c; + for (@{$self->{cc_conns}}) { + push(@c, $_->[0]) if !$type || ($type && $type eq $_->[1]); + } + @c; +} + + +sub _looks_like_number { + $_[0] =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/; +} + +1; + + +__END__ + +=head1 NAME + +LWP::ConnCache - Connection cache manager + +=head1 NOTE + +This module is experimental. Details of its interface is likely to +change in the future. + +=head1 SYNOPSIS + + use LWP::ConnCache; + my $cache = LWP::ConnCache->new; + $cache->deposit($type, $key, $sock); + $sock = $cache->withdraw($type, $key); + +=head1 DESCRIPTION + +The C<LWP::ConnCache> class is the standard connection cache manager +for LWP::UserAgent. + +The following basic methods are provided: + +=over + +=item $cache = LWP::ConnCache->new( %options ) + +This method constructs a new C<LWP::ConnCache> object. The only +option currently accepted is 'total_capacity'. If specified it +initialize the total_capacity option. It defaults to the value 1. + +=item $cache->total_capacity( [$num_connections] ) + +Get/sets the number of connection that will be cached. Connections +will start to be dropped when this limit is reached. If set to C<0>, +then all connections are immediately dropped. If set to C<undef>, +then there is no limit. + +=item $cache->capacity($type, [$num_connections] ) + +Get/set a limit for the number of connections of the specified type +that can be cached. The $type will typically be a short string like +"http" or "ftp". + +=item $cache->drop( [$checker, [$reason]] ) + +Drop connections by some criteria. The $checker argument is a +subroutine that is called for each connection. If the routine returns +a TRUE value then the connection is dropped. The routine is called +with ($conn, $type, $key, $deposit_time) as arguments. + +Shortcuts: If the $checker argument is absent (or C<undef>) all cached +connections are dropped. If the $checker is a number then all +connections untouched that the given number of seconds or more are +dropped. If $checker is a string then all connections of the given +type are dropped. + +The $reason argument is passed on to the dropped() method. + +=item $cache->prune + +Calling this method will drop all connections that are dead. This is +tested by calling the ping() method on the connections. If the ping() +method exists and returns a FALSE value, then the connection is +dropped. + +=item $cache->get_types + +This returns all the 'type' fields used for the currently cached +connections. + +=item $cache->get_connections( [$type] ) + +This returns all connection objects of the specified type. If no type +is specified then all connections are returned. In scalar context the +number of cached connections of the specified type is returned. + +=back + + +The following methods are called by low-level protocol modules to +try to save away connections and to get them back. + +=over + +=item $cache->deposit($type, $key, $conn) + +This method adds a new connection to the cache. As a result other +already cached connections might be dropped. Multiple connections with +the same $type/$key might added. + +=item $conn = $cache->withdraw($type, $key) + +This method tries to fetch back a connection that was previously +deposited. If no cached connection with the specified $type/$key is +found, then C<undef> is returned. There is not guarantee that a +deposited connection can be withdrawn, as the cache manger is free to +drop connections at any time. + +=back + +The following methods are called internally. Subclasses might want to +override them. + +=over + +=item $conn->enforce_limits([$type]) + +This method is called with after a new connection is added (deposited) +in the cache or capacity limits are adjusted. The default +implementation drops connections until the specified capacity limits +are not exceeded. + +=item $conn->dropping($conn_record, $reason) + +This method is called when a connection is dropped. The record +belonging to the dropped connection is passed as the first argument +and a string describing the reason for the drop is passed as the +second argument. The default implementation makes some noise if the +$LWP::ConnCache::DEBUG variable is set and nothing more. + +=back + +=head1 SUBCLASSING + +For specialized cache policy it makes sense to subclass +C<LWP::ConnCache> and perhaps override the deposit(), enforce_limits() +and dropping() methods. + +The object itself is a hash. Keys prefixed with C<cc_> are reserved +for the base class. + +=head1 SEE ALSO + +L<LWP::UserAgent> + +=head1 COPYRIGHT + +Copyright 2001 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. diff --git a/lib/LWP/Debug.pm b/lib/LWP/Debug.pm new file mode 100644 index 0000000..99011d5 --- /dev/null +++ b/lib/LWP/Debug.pm @@ -0,0 +1,110 @@ +package LWP::Debug; # legacy + +require Exporter; +@ISA = qw(Exporter); +@EXPORT_OK = qw(level trace debug conns); + +use Carp (); + +my @levels = qw(trace debug conns); +%current_level = (); + + +sub import +{ + my $pack = shift; + my $callpkg = caller(0); + my @symbols = (); + my @levels = (); + for (@_) { + if (/^[-+]/) { + push(@levels, $_); + } + else { + push(@symbols, $_); + } + } + Exporter::export($pack, $callpkg, @symbols); + level(@levels); +} + + +sub level +{ + for (@_) { + if ($_ eq '+') { # all on + # switch on all levels + %current_level = map { $_ => 1 } @levels; + } + elsif ($_ eq '-') { # all off + %current_level = (); + } + elsif (/^([-+])(\w+)$/) { + $current_level{$2} = $1 eq '+'; + } + else { + Carp::croak("Illegal level format $_"); + } + } +} + + +sub trace { _log(@_) if $current_level{'trace'}; } +sub debug { _log(@_) if $current_level{'debug'}; } +sub conns { _log(@_) if $current_level{'conns'}; } + + +sub _log +{ + my $msg = shift; + $msg .= "\n" unless $msg =~ /\n$/; # ensure trailing "\n" + + my($package,$filename,$line,$sub) = caller(2); + print STDERR "$sub: $msg"; +} + +1; + +__END__ + +=head1 NAME + +LWP::Debug - deprecated + +=head1 DESCRIPTION + +LWP::Debug is used to provide tracing facilities, but these are not used +by LWP any more. The code in this module is kept around +(undocumented) so that 3rd party code that happens to use the old +interfaces continue to run. + +One useful feature that LWP::Debug provided (in an imprecise and +troublesome way) was network traffic monitoring. The following +section provides some hints about recommended replacements. + +=head2 Network traffic monitoring + +The best way to monitor the network traffic that LWP generates is to +use an external TCP monitoring program. The Wireshark program +(L<http://www.wireshark.org/>) is highly recommended for this. + +Another approach it to use a debugging HTTP proxy server and make +LWP direct all its traffic via this one. Call C<< $ua->proxy >> to +set it up and then just use LWP as before. + +For less precise monitoring needs just setting up a few simple +handlers might do. The following example sets up handlers to dump the +request and response objects that pass through LWP: + + use LWP::UserAgent; + $ua = LWP::UserAgent->new; + $ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable()); + + $ua->add_handler("request_send", sub { shift->dump; return }); + $ua->add_handler("response_done", sub { shift->dump; return }); + + $ua->get("http://www.example.com"); + +=head1 SEE ALSO + +L<LWP::UserAgent> diff --git a/lib/LWP/DebugFile.pm b/lib/LWP/DebugFile.pm new file mode 100644 index 0000000..aacdfca --- /dev/null +++ b/lib/LWP/DebugFile.pm @@ -0,0 +1,5 @@ +package LWP::DebugFile; + +# legacy stub + +1; diff --git a/lib/LWP/MemberMixin.pm b/lib/LWP/MemberMixin.pm new file mode 100644 index 0000000..e5ee6f6 --- /dev/null +++ b/lib/LWP/MemberMixin.pm @@ -0,0 +1,44 @@ +package LWP::MemberMixin; + +sub _elem +{ + my $self = shift; + my $elem = shift; + my $old = $self->{$elem}; + $self->{$elem} = shift if @_; + return $old; +} + +1; + +__END__ + +=head1 NAME + +LWP::MemberMixin - Member access mixin class + +=head1 SYNOPSIS + + package Foo; + require LWP::MemberMixin; + @ISA=qw(LWP::MemberMixin); + +=head1 DESCRIPTION + +A mixin class to get methods that provide easy access to member +variables in the %$self. +Ideally there should be better Perl language support for this. + +There is only one method provided: + +=over 4 + +=item _elem($elem [, $val]) + +Internal method to get/set the value of member variable +C<$elem>. If C<$val> is present it is used as the new value +for the member variable. If it is not present the current +value is not touched. In both cases the previous value of +the member variable is returned. + +=back diff --git a/lib/LWP/Protocol.pm b/lib/LWP/Protocol.pm new file mode 100644 index 0000000..8f17d4d --- /dev/null +++ b/lib/LWP/Protocol.pm @@ -0,0 +1,291 @@ +package LWP::Protocol; + +require LWP::MemberMixin; +@ISA = qw(LWP::MemberMixin); +$VERSION = "6.13"; + +use strict; +use Carp (); +use HTTP::Status (); +use HTTP::Response; + +my %ImplementedBy = (); # scheme => classname + + + +sub new +{ + my($class, $scheme, $ua) = @_; + + my $self = bless { + scheme => $scheme, + ua => $ua, + + # historical/redundant + max_size => $ua->{max_size}, + }, $class; + + $self; +} + + +sub create +{ + my($scheme, $ua) = @_; + my $impclass = LWP::Protocol::implementor($scheme) or + Carp::croak("Protocol scheme '$scheme' is not supported"); + + # hand-off to scheme specific implementation sub-class + my $protocol = $impclass->new($scheme, $ua); + + return $protocol; +} + + +sub implementor +{ + my($scheme, $impclass) = @_; + + if ($impclass) { + $ImplementedBy{$scheme} = $impclass; + } + my $ic = $ImplementedBy{$scheme}; + return $ic if $ic; + + return '' unless $scheme =~ /^([.+\-\w]+)$/; # check valid URL schemes + $scheme = $1; # untaint + $scheme =~ s/[.+\-]/_/g; # make it a legal module name + + # scheme not yet known, look for a 'use'd implementation + $ic = "LWP::Protocol::$scheme"; # default location + $ic = "LWP::Protocol::nntp" if $scheme eq 'news'; #XXX ugly hack + no strict 'refs'; + # check we actually have one for the scheme: + unless (@{"${ic}::ISA"}) { + # try to autoload it + eval "require $ic"; + if ($@) { + if ($@ =~ /Can't locate/) { #' #emacs get confused by ' + $ic = ''; + } + else { + die "$@\n"; + } + } + } + $ImplementedBy{$scheme} = $ic if $ic; + $ic; +} + + +sub request +{ + my($self, $request, $proxy, $arg, $size, $timeout) = @_; + Carp::croak('LWP::Protocol::request() needs to be overridden in subclasses'); +} + + +# legacy +sub timeout { shift->_elem('timeout', @_); } +sub max_size { shift->_elem('max_size', @_); } + + +sub collect +{ + my ($self, $arg, $response, $collector) = @_; + my $content; + my($ua, $max_size) = @{$self}{qw(ua max_size)}; + + eval { + local $\; # protect the print below from surprises + if (!defined($arg) || !$response->is_success) { + $response->{default_add_content} = 1; + } + elsif (!ref($arg) && length($arg)) { + open(my $fh, ">", $arg) or die "Can't write to '$arg': $!"; + binmode($fh); + push(@{$response->{handlers}{response_data}}, { + callback => sub { + print $fh $_[3] or die "Can't write to '$arg': $!"; + 1; + }, + }); + push(@{$response->{handlers}{response_done}}, { + callback => sub { + close($fh) or die "Can't write to '$arg': $!"; + undef($fh); + }, + }); + } + elsif (ref($arg) eq 'CODE') { + push(@{$response->{handlers}{response_data}}, { + callback => sub { + &$arg($_[3], $_[0], $self); + 1; + }, + }); + } + else { + die "Unexpected collect argument '$arg'"; + } + + $ua->run_handlers("response_header", $response); + + if (delete $response->{default_add_content}) { + push(@{$response->{handlers}{response_data}}, { + callback => sub { + $_[0]->add_content($_[3]); + 1; + }, + }); + } + + + my $content_size = 0; + my $length = $response->content_length; + my %skip_h; + + while ($content = &$collector, length $$content) { + for my $h ($ua->handlers("response_data", $response)) { + next if $skip_h{$h}; + unless ($h->{callback}->($response, $ua, $h, $$content)) { + # XXX remove from $response->{handlers}{response_data} if present + $skip_h{$h}++; + } + } + $content_size += length($$content); + $ua->progress(($length ? ($content_size / $length) : "tick"), $response); + if (defined($max_size) && $content_size > $max_size) { + $response->push_header("Client-Aborted", "max_size"); + last; + } + } + }; + my $err = $@; + delete $response->{handlers}{response_data}; + delete $response->{handlers} unless %{$response->{handlers}}; + if ($err) { + chomp($err); + $response->push_header('X-Died' => $err); + $response->push_header("Client-Aborted", "die"); + return $response; + } + + return $response; +} + + +sub collect_once +{ + my($self, $arg, $response) = @_; + my $content = \ $_[3]; + my $first = 1; + $self->collect($arg, $response, sub { + return $content if $first--; + return \ ""; + }); +} + +1; + + +__END__ + +=head1 NAME + +LWP::Protocol - Base class for LWP protocols + +=head1 SYNOPSIS + + package LWP::Protocol::foo; + require LWP::Protocol; + @ISA=qw(LWP::Protocol); + +=head1 DESCRIPTION + +This class is used a the base class for all protocol implementations +supported by the LWP library. + +When creating an instance of this class using +C<LWP::Protocol::create($url)>, and you get an initialized subclass +appropriate for that access method. In other words, the +LWP::Protocol::create() function calls the constructor for one of its +subclasses. + +All derived LWP::Protocol classes need to override the request() +method which is used to service a request. The overridden method can +make use of the collect() function to collect together chunks of data +as it is received. + +The following methods and functions are provided: + +=over 4 + +=item $prot = LWP::Protocol->new() + +The LWP::Protocol constructor is inherited by subclasses. As this is a +virtual base class this method should B<not> be called directly. + +=item $prot = LWP::Protocol::create($scheme) + +Create an object of the class implementing the protocol to handle the +given scheme. This is a function, not a method. It is more an object +factory than a constructor. This is the function user agents should +use to access protocols. + +=item $class = LWP::Protocol::implementor($scheme, [$class]) + +Get and/or set implementor class for a scheme. Returns '' if the +specified scheme is not supported. + +=item $prot->request(...) + + $response = $protocol->request($request, $proxy, undef); + $response = $protocol->request($request, $proxy, '/tmp/sss'); + $response = $protocol->request($request, $proxy, \&callback, 1024); + +Dispatches a request over the protocol, and returns a response +object. This method needs to be overridden in subclasses. Refer to +L<LWP::UserAgent> for description of the arguments. + +=item $prot->collect($arg, $response, $collector) + +Called to collect the content of a request, and process it +appropriately into a scalar, file, or by calling a callback. If $arg +is undefined, then the content is stored within the $response. If +$arg is a simple scalar, then $arg is interpreted as a file name and +the content is written to this file. If $arg is a reference to a +routine, then content is passed to this routine. + +The $collector is a routine that will be called and which is +responsible for returning pieces (as ref to scalar) of the content to +process. The $collector signals EOF by returning a reference to an +empty string. + +The return value from collect() is the $response object reference. + +B<Note:> We will only use the callback or file argument if +$response->is_success(). This avoids sending content data for +redirects and authentication responses to the callback which would be +confusing. + +=item $prot->collect_once($arg, $response, $content) + +Can be called when the whole response content is available as +$content. This will invoke collect() with a collector callback that +returns a reference to $content the first time and an empty string the +next. + +=back + +=head1 SEE ALSO + +Inspect the F<LWP/Protocol/file.pm> and F<LWP/Protocol/http.pm> files +for examples of usage. + +=head1 COPYRIGHT + +Copyright 1995-2001 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. diff --git a/lib/LWP/Protocol/GHTTP.pm b/lib/LWP/Protocol/GHTTP.pm new file mode 100644 index 0000000..2a356b5 --- /dev/null +++ b/lib/LWP/Protocol/GHTTP.pm @@ -0,0 +1,73 @@ +package LWP::Protocol::GHTTP; + +# You can tell LWP to use this module for 'http' requests by running +# code like this before you make requests: +# +# require LWP::Protocol::GHTTP; +# LWP::Protocol::implementor('http', 'LWP::Protocol::GHTTP'); +# + +use strict; +use vars qw(@ISA); + +require LWP::Protocol; +@ISA=qw(LWP::Protocol); + +require HTTP::Response; +require HTTP::Status; + +use HTTP::GHTTP qw(METHOD_GET METHOD_HEAD METHOD_POST); + +my %METHOD = +( + GET => METHOD_GET, + HEAD => METHOD_HEAD, + POST => METHOD_POST, +); + +sub request +{ + my($self, $request, $proxy, $arg, $size, $timeout) = @_; + + my $method = $request->method; + unless (exists $METHOD{$method}) { + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + "Bad method '$method'"); + } + + my $r = HTTP::GHTTP->new($request->uri); + + # XXX what headers for repeated headers here? + $request->headers->scan(sub { $r->set_header(@_)}); + + $r->set_type($METHOD{$method}); + + # XXX should also deal with subroutine content. + my $cref = $request->content_ref; + $r->set_body($$cref) if length($$cref); + + # XXX is this right + $r->set_proxy($proxy->as_string) if $proxy; + + $r->process_request; + + my $response = HTTP::Response->new($r->get_status); + + # XXX How can get the headers out of $r?? This way is too stupid. + my @headers; + eval { + # Wrapped in eval because this method is not always available + @headers = $r->get_headers; + }; + @headers = qw(Date Connection Server Content-type + Accept-Ranges Server + Content-Length Last-Modified ETag) if $@; + for (@headers) { + my $v = $r->get_header($_); + $response->header($_ => $v) if defined $v; + } + + return $self->collect_once($arg, $response, $r->get_body); +} + +1; diff --git a/lib/LWP/Protocol/cpan.pm b/lib/LWP/Protocol/cpan.pm new file mode 100644 index 0000000..66d8f21 --- /dev/null +++ b/lib/LWP/Protocol/cpan.pm @@ -0,0 +1,72 @@ +package LWP::Protocol::cpan; + +use strict; +use vars qw(@ISA); + +require LWP::Protocol; +@ISA = qw(LWP::Protocol); + +require URI; +require HTTP::Status; +require HTTP::Response; + +our $CPAN; + +unless ($CPAN) { + # Try to find local CPAN mirror via $CPAN::Config + eval { + require CPAN::Config; + if($CPAN::Config) { + my $urls = $CPAN::Config->{urllist}; + if (ref($urls) eq "ARRAY") { + my $file; + for (@$urls) { + if (/^file:/) { + $file = $_; + last; + } + } + + if ($file) { + $CPAN = $file; + } + else { + $CPAN = $urls->[0]; + } + } + } + }; + + $CPAN ||= "http://cpan.org/"; # last resort +} + +# ensure that we don't chop of last part +$CPAN .= "/" unless $CPAN =~ m,/$,; + + +sub request { + my($self, $request, $proxy, $arg, $size) = @_; + # check proxy + if (defined $proxy) + { + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + 'You can not proxy with cpan'); + } + + # check method + my $method = $request->method; + unless ($method eq 'GET' || $method eq 'HEAD') { + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + 'Library does not allow method ' . + "$method for 'cpan:' URLs"); + } + + my $path = $request->uri->path; + $path =~ s,^/,,; + + my $response = HTTP::Response->new(&HTTP::Status::RC_FOUND); + $response->header("Location" => URI->new_abs($path, $CPAN)); + $response; +} + +1; diff --git a/lib/LWP/Protocol/data.pm b/lib/LWP/Protocol/data.pm new file mode 100644 index 0000000..c29c3b4 --- /dev/null +++ b/lib/LWP/Protocol/data.pm @@ -0,0 +1,52 @@ +package LWP::Protocol::data; + +# Implements access to data:-URLs as specified in RFC 2397 + +use strict; +use vars qw(@ISA); + +require HTTP::Response; +require HTTP::Status; + +require LWP::Protocol; +@ISA = qw(LWP::Protocol); + +use HTTP::Date qw(time2str); +require LWP; # needs version number + +sub request +{ + my($self, $request, $proxy, $arg, $size) = @_; + + # check proxy + if (defined $proxy) + { + return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST, + 'You can not proxy with data'); + } + + # check method + my $method = $request->method; + unless ($method eq 'GET' || $method eq 'HEAD') { + return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST, + 'Library does not allow method ' . + "$method for 'data:' URLs"); + } + + my $url = $request->uri; + my $response = HTTP::Response->new( &HTTP::Status::RC_OK, "Document follows"); + + my $media_type = $url->media_type; + + my $data = $url->data; + $response->header('Content-Type' => $media_type, + 'Content-Length' => length($data), + 'Date' => time2str(time), + 'Server' => "libwww-perl-internal/$LWP::VERSION" + ); + + $data = "" if $method eq "HEAD"; + return $self->collect_once($arg, $response, $data); +} + +1; diff --git a/lib/LWP/Protocol/file.pm b/lib/LWP/Protocol/file.pm new file mode 100644 index 0000000..f2887f4 --- /dev/null +++ b/lib/LWP/Protocol/file.pm @@ -0,0 +1,146 @@ +package LWP::Protocol::file; + +require LWP::Protocol; +@ISA = qw(LWP::Protocol); + +use strict; + +require LWP::MediaTypes; +require HTTP::Request; +require HTTP::Response; +require HTTP::Status; +require HTTP::Date; + + +sub request +{ + my($self, $request, $proxy, $arg, $size) = @_; + + $size = 4096 unless defined $size and $size > 0; + + # check proxy + if (defined $proxy) + { + return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST, + 'You can not proxy through the filesystem'); + } + + # check method + my $method = $request->method; + unless ($method eq 'GET' || $method eq 'HEAD') { + return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST, + 'Library does not allow method ' . + "$method for 'file:' URLs"); + } + + # check url + my $url = $request->uri; + + my $scheme = $url->scheme; + if ($scheme ne 'file') { + return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "LWP::Protocol::file::request called for '$scheme'"); + } + + # URL OK, look at file + my $path = $url->file; + + # test file exists and is readable + unless (-e $path) { + return HTTP::Response->new( &HTTP::Status::RC_NOT_FOUND, + "File `$path' does not exist"); + } + unless (-r _) { + return HTTP::Response->new( &HTTP::Status::RC_FORBIDDEN, + 'User does not have read permission'); + } + + # looks like file exists + my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$filesize, + $atime,$mtime,$ctime,$blksize,$blocks) + = stat(_); + + # XXX should check Accept headers? + + # check if-modified-since + my $ims = $request->header('If-Modified-Since'); + if (defined $ims) { + my $time = HTTP::Date::str2time($ims); + if (defined $time and $time >= $mtime) { + return HTTP::Response->new( &HTTP::Status::RC_NOT_MODIFIED, + "$method $path"); + } + } + + # Ok, should be an OK response by now... + my $response = HTTP::Response->new( &HTTP::Status::RC_OK ); + + # fill in response headers + $response->header('Last-Modified', HTTP::Date::time2str($mtime)); + + if (-d _) { # If the path is a directory, process it + # generate the HTML for directory + opendir(D, $path) or + return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "Cannot read directory '$path': $!"); + my(@files) = sort readdir(D); + closedir(D); + + # Make directory listing + require URI::Escape; + require HTML::Entities; + my $pathe = $path . ( $^O eq 'MacOS' ? ':' : '/'); + for (@files) { + my $furl = URI::Escape::uri_escape($_); + if ( -d "$pathe$_" ) { + $furl .= '/'; + $_ .= '/'; + } + my $desc = HTML::Entities::encode($_); + $_ = qq{<LI><A HREF="$furl">$desc</A>}; + } + # Ensure that the base URL is "/" terminated + my $base = $url->clone; + unless ($base->path =~ m|/$|) { + $base->path($base->path . "/"); + } + my $html = join("\n", + "<HTML>\n<HEAD>", + "<TITLE>Directory $path</TITLE>", + "<BASE HREF=\"$base\">", + "</HEAD>\n<BODY>", + "<H1>Directory listing of $path</H1>", + "<UL>", @files, "</UL>", + "</BODY>\n</HTML>\n"); + + $response->header('Content-Type', 'text/html'); + $response->header('Content-Length', length $html); + $html = "" if $method eq "HEAD"; + + return $self->collect_once($arg, $response, $html); + + } + + # path is a regular file + $response->header('Content-Length', $filesize); + LWP::MediaTypes::guess_media_type($path, $response); + + # read the file + if ($method ne "HEAD") { + open(F, $path) or return new + HTTP::Response(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "Cannot read file '$path': $!"); + binmode(F); + $response = $self->collect($arg, $response, sub { + my $content = ""; + my $bytes = sysread(F, $content, $size); + return \$content if $bytes > 0; + return \ ""; + }); + close(F); + } + + $response; +} + +1; diff --git a/lib/LWP/Protocol/ftp.pm b/lib/LWP/Protocol/ftp.pm new file mode 100644 index 0000000..f478c6e --- /dev/null +++ b/lib/LWP/Protocol/ftp.pm @@ -0,0 +1,543 @@ +package LWP::Protocol::ftp; + +# Implementation of the ftp protocol (RFC 959). We let the Net::FTP +# package do all the dirty work. + +use Carp (); + +use HTTP::Status (); +use HTTP::Negotiate (); +use HTTP::Response (); +use LWP::MediaTypes (); +use File::Listing (); + +require LWP::Protocol; +@ISA = qw(LWP::Protocol); + +use strict; +eval { + package LWP::Protocol::MyFTP; + + require Net::FTP; + Net::FTP->require_version(2.00); + + use vars qw(@ISA); + @ISA=qw(Net::FTP); + + sub new { + my $class = shift; + + my $self = $class->SUPER::new(@_) || return undef; + + my $mess = $self->message; # welcome message + $mess =~ s|\n.*||s; # only first line left + $mess =~ s|\s*ready\.?$||; + # Make the version number more HTTP like + $mess =~ s|\s*\(Version\s*|/| and $mess =~ s|\)$||; + ${*$self}{myftp_server} = $mess; + #$response->header("Server", $mess); + + $self; + } + + sub http_server { + my $self = shift; + ${*$self}{myftp_server}; + } + + sub home { + my $self = shift; + my $old = ${*$self}{myftp_home}; + if (@_) { + ${*$self}{myftp_home} = shift; + } + $old; + } + + sub go_home { + my $self = shift; + $self->cwd(${*$self}{myftp_home}); + } + + sub request_count { + my $self = shift; + ++${*$self}{myftp_reqcount}; + } + + sub ping { + my $self = shift; + return $self->go_home; + } + +}; +my $init_failed = $@; + + +sub _connect { + my($self, $host, $port, $user, $account, $password, $timeout) = @_; + + my $key; + my $conn_cache = $self->{ua}{conn_cache}; + if ($conn_cache) { + $key = "$host:$port:$user"; + $key .= ":$account" if defined($account); + if (my $ftp = $conn_cache->withdraw("ftp", $key)) { + if ($ftp->ping) { + # save it again + $conn_cache->deposit("ftp", $key, $ftp); + return $ftp; + } + } + } + + # try to make a connection + my $ftp = LWP::Protocol::MyFTP->new($host, + Port => $port, + Timeout => $timeout, + LocalAddr => $self->{ua}{local_address}, + ); + # XXX Should be some what to pass on 'Passive' (header??) + unless ($ftp) { + $@ =~ s/^Net::FTP: //; + return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, $@); + } + + unless ($ftp->login($user, $password, $account)) { + # Unauthorized. Let's fake a RC_UNAUTHORIZED response + my $mess = scalar($ftp->message); + $mess =~ s/\n$//; + my $res = HTTP::Response->new(&HTTP::Status::RC_UNAUTHORIZED, $mess); + $res->header("Server", $ftp->http_server); + $res->header("WWW-Authenticate", qq(Basic Realm="FTP login")); + return $res; + } + + my $home = $ftp->pwd; + $ftp->home($home); + + $conn_cache->deposit("ftp", $key, $ftp) if $conn_cache; + + return $ftp; +} + + +sub request +{ + my($self, $request, $proxy, $arg, $size, $timeout) = @_; + + $size = 4096 unless $size; + + # check proxy + if (defined $proxy) + { + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + 'You can not proxy through the ftp'); + } + + my $url = $request->uri; + if ($url->scheme ne 'ftp') { + my $scheme = $url->scheme; + return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "LWP::Protocol::ftp::request called for '$scheme'"); + } + + # check method + my $method = $request->method; + + unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'PUT') { + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + 'Library does not allow method ' . + "$method for 'ftp:' URLs"); + } + + if ($init_failed) { + return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, + $init_failed); + } + + my $host = $url->host; + my $port = $url->port; + my $user = $url->user; + my $password = $url->password; + + # If a basic authorization header is present than we prefer these over + # the username/password specified in the URL. + { + my($u,$p) = $request->authorization_basic; + if (defined $u) { + $user = $u; + $password = $p; + } + } + + # We allow the account to be specified in the "Account" header + my $account = $request->header('Account'); + + my $ftp = $self->_connect($host, $port, $user, $account, $password, $timeout); + return $ftp if ref($ftp) eq "HTTP::Response"; # ugh! + + # Create an initial response object + my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK"); + $response->header(Server => $ftp->http_server); + $response->header('Client-Request-Num' => $ftp->request_count); + $response->request($request); + + # Get & fix the path + my @path = grep { length } $url->path_segments; + my $remote_file = pop(@path); + $remote_file = '' unless defined $remote_file; + + my $type; + if (ref $remote_file) { + my @params; + ($remote_file, @params) = @$remote_file; + for (@params) { + $type = $_ if s/^type=//; + } + } + + if ($type && $type eq 'a') { + $ftp->ascii; + } + else { + $ftp->binary; + } + + for (@path) { + unless ($ftp->cwd($_)) { + return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND, + "Can't chdir to $_"); + } + } + + if ($method eq 'GET' || $method eq 'HEAD') { + if (my $mod_time = $ftp->mdtm($remote_file)) { + $response->last_modified($mod_time); + if (my $ims = $request->if_modified_since) { + if ($mod_time <= $ims) { + $response->code(&HTTP::Status::RC_NOT_MODIFIED); + $response->message("Not modified"); + return $response; + } + } + } + + # We'll use this later to abort the transfer if necessary. + # if $max_size is defined, we need to abort early. Otherwise, it's + # a normal transfer + my $max_size = undef; + + # Set resume location, if the client requested it + if ($request->header('Range') && $ftp->supported('REST')) + { + my $range_info = $request->header('Range'); + + # Change bytes=2772992-6781209 to just 2772992 + my ($start_byte,$end_byte) = $range_info =~ /.*=\s*(\d+)-(\d+)?/; + if ( defined $start_byte && !defined $end_byte ) { + + # open range -- only the start is specified + + $ftp->restart( $start_byte ); + # don't define $max_size, we don't want to abort early + } + elsif ( defined $start_byte && defined $end_byte && + $start_byte >= 0 && $end_byte >= $start_byte ) { + + $ftp->restart( $start_byte ); + $max_size = $end_byte - $start_byte; + } + else { + + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + 'Incorrect syntax for Range request'); + } + } + elsif ($request->header('Range') && !$ftp->supported('REST')) + { + return HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED, + "Server does not support resume."); + } + + my $data; # the data handle + if (length($remote_file) and $data = $ftp->retr($remote_file)) { + my($type, @enc) = LWP::MediaTypes::guess_media_type($remote_file); + $response->header('Content-Type', $type) if $type; + for (@enc) { + $response->push_header('Content-Encoding', $_); + } + my $mess = $ftp->message; + if ($mess =~ /\((\d+)\s+bytes\)/) { + $response->header('Content-Length', "$1"); + } + + if ($method ne 'HEAD') { + # Read data from server + $response = $self->collect($arg, $response, sub { + my $content = ''; + my $result = $data->read($content, $size); + + # Stop early if we need to. + if (defined $max_size) + { + # We need an interface to Net::FTP::dataconn for getting + # the number of bytes already read + my $bytes_received = $data->bytes_read(); + + # We were already over the limit. (Should only happen + # once at the end.) + if ($bytes_received - length($content) > $max_size) + { + $content = ''; + } + # We just went over the limit + elsif ($bytes_received > $max_size) + { + # Trim content + $content = substr($content, 0, + $max_size - ($bytes_received - length($content)) ); + } + # We're under the limit + else + { + } + } + + return \$content; + } ); + } + # abort is needed for HEAD, it's == close if the transfer has + # already completed. + unless ($data->abort) { + # Something did not work too well. Note that we treat + # responses to abort() with code 0 in case of HEAD as ok + # (at least wu-ftpd 2.6.1(1) does that). + if ($method ne 'HEAD' || $ftp->code != 0) { + $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR); + $response->message("FTP close response: " . $ftp->code . + " " . $ftp->message); + } + } + } + elsif (!length($remote_file) || ( $ftp->code >= 400 && $ftp->code < 600 )) { + # not a plain file, try to list instead + if (length($remote_file) && !$ftp->cwd($remote_file)) { + return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND, + "File '$remote_file' not found"); + } + + # It should now be safe to try to list the directory + my @lsl = $ftp->dir; + + # Try to figure out if the user want us to convert the + # directory listing to HTML. + my @variants = + ( + ['html', 0.60, 'text/html' ], + ['dir', 1.00, 'text/ftp-dir-listing' ] + ); + #$HTTP::Negotiate::DEBUG=1; + my $prefer = HTTP::Negotiate::choose(\@variants, $request); + + my $content = ''; + + if (!defined($prefer)) { + return HTTP::Response->new(&HTTP::Status::RC_NOT_ACCEPTABLE, + "Neither HTML nor directory listing wanted"); + } + elsif ($prefer eq 'html') { + $response->header('Content-Type' => 'text/html'); + $content = "<HEAD><TITLE>File Listing</TITLE>\n"; + my $base = $request->uri->clone; + my $path = $base->path; + $base->path("$path/") unless $path =~ m|/$|; + $content .= qq(<BASE HREF="$base">\n</HEAD>\n); + $content .= "<BODY>\n<UL>\n"; + for (File::Listing::parse_dir(\@lsl, 'GMT')) { + my($name, $type, $size, $mtime, $mode) = @$_; + $content .= qq( <LI> <a href="$name">$name</a>); + $content .= " $size bytes" if $type eq 'f'; + $content .= "\n"; + } + $content .= "</UL></body>\n"; + } + else { + $response->header('Content-Type', 'text/ftp-dir-listing'); + $content = join("\n", @lsl, ''); + } + + $response->header('Content-Length', length($content)); + + if ($method ne 'HEAD') { + $response = $self->collect_once($arg, $response, $content); + } + } + else { + my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + "FTP return code " . $ftp->code); + $res->content_type("text/plain"); + $res->content($ftp->message); + return $res; + } + } + elsif ($method eq 'PUT') { + # method must be PUT + unless (length($remote_file)) { + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + "Must have a file name to PUT to"); + } + my $data; + if ($data = $ftp->stor($remote_file)) { + my $content = $request->content; + my $bytes = 0; + if (defined $content) { + if (ref($content) eq 'SCALAR') { + $bytes = $data->write($$content, length($$content)); + } + elsif (ref($content) eq 'CODE') { + my($buf, $n); + while (length($buf = &$content)) { + $n = $data->write($buf, length($buf)); + last unless $n; + $bytes += $n; + } + } + elsif (!ref($content)) { + if (defined $content && length($content)) { + $bytes = $data->write($content, length($content)); + } + } + else { + die "Bad content"; + } + } + $data->close; + + $response->code(&HTTP::Status::RC_CREATED); + $response->header('Content-Type', 'text/plain'); + $response->content("$bytes bytes stored as $remote_file on $host\n") + + } + else { + my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + "FTP return code " . $ftp->code); + $res->content_type("text/plain"); + $res->content($ftp->message); + return $res; + } + } + else { + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + "Illegal method $method"); + } + + $response; +} + +1; + +__END__ + +# This is what RFC 1738 has to say about FTP access: +# -------------------------------------------------- +# +# 3.2. FTP +# +# The FTP URL scheme is used to designate files and directories on +# Internet hosts accessible using the FTP protocol (RFC959). +# +# A FTP URL follow the syntax described in Section 3.1. If :<port> is +# omitted, the port defaults to 21. +# +# 3.2.1. FTP Name and Password +# +# A user name and password may be supplied; they are used in the ftp +# "USER" and "PASS" commands after first making the connection to the +# FTP server. If no user name or password is supplied and one is +# requested by the FTP server, the conventions for "anonymous" FTP are +# to be used, as follows: +# +# The user name "anonymous" is supplied. +# +# The password is supplied as the Internet e-mail address +# of the end user accessing the resource. +# +# If the URL supplies a user name but no password, and the remote +# server requests a password, the program interpreting the FTP URL +# should request one from the user. +# +# 3.2.2. FTP url-path +# +# The url-path of a FTP URL has the following syntax: +# +# <cwd1>/<cwd2>/.../<cwdN>/<name>;type=<typecode> +# +# Where <cwd1> through <cwdN> and <name> are (possibly encoded) strings +# and <typecode> is one of the characters "a", "i", or "d". The part +# ";type=<typecode>" may be omitted. The <cwdx> and <name> parts may be +# empty. The whole url-path may be omitted, including the "/" +# delimiting it from the prefix containing user, password, host, and +# port. +# +# The url-path is interpreted as a series of FTP commands as follows: +# +# Each of the <cwd> elements is to be supplied, sequentially, as the +# argument to a CWD (change working directory) command. +# +# If the typecode is "d", perform a NLST (name list) command with +# <name> as the argument, and interpret the results as a file +# directory listing. +# +# Otherwise, perform a TYPE command with <typecode> as the argument, +# and then access the file whose name is <name> (for example, using +# the RETR command.) +# +# Within a name or CWD component, the characters "/" and ";" are +# reserved and must be encoded. The components are decoded prior to +# their use in the FTP protocol. In particular, if the appropriate FTP +# sequence to access a particular file requires supplying a string +# containing a "/" as an argument to a CWD or RETR command, it is +# necessary to encode each "/". +# +# For example, the URL <URL:ftp://myname@host.dom/%2Fetc/motd> is +# interpreted by FTP-ing to "host.dom", logging in as "myname" +# (prompting for a password if it is asked for), and then executing +# "CWD /etc" and then "RETR motd". This has a different meaning from +# <URL:ftp://myname@host.dom/etc/motd> which would "CWD etc" and then +# "RETR motd"; the initial "CWD" might be executed relative to the +# default directory for "myname". On the other hand, +# <URL:ftp://myname@host.dom//etc/motd>, would "CWD " with a null +# argument, then "CWD etc", and then "RETR motd". +# +# FTP URLs may also be used for other operations; for example, it is +# possible to update a file on a remote file server, or infer +# information about it from the directory listings. The mechanism for +# doing so is not spelled out here. +# +# 3.2.3. FTP Typecode is Optional +# +# The entire ;type=<typecode> part of a FTP URL is optional. If it is +# omitted, the client program interpreting the URL must guess the +# appropriate mode to use. In general, the data content type of a file +# can only be guessed from the name, e.g., from the suffix of the name; +# the appropriate type code to be used for transfer of the file can +# then be deduced from the data content of the file. +# +# 3.2.4 Hierarchy +# +# For some file systems, the "/" used to denote the hierarchical +# structure of the URL corresponds to the delimiter used to construct a +# file name hierarchy, and thus, the filename will look similar to the +# URL path. This does NOT mean that the URL is a Unix filename. +# +# 3.2.5. Optimization +# +# Clients accessing resources via FTP may employ additional heuristics +# to optimize the interaction. For some FTP servers, for example, it +# may be reasonable to keep the control connection open while accessing +# multiple URLs from the same server. However, there is no common +# hierarchical model to the FTP protocol, so if a directory change +# command has been given, it is impossible in general to deduce what +# sequence should be given to navigate to another directory for a +# second retrieval, if the paths are different. The only reliable +# algorithm is to disconnect and reestablish the control connection. diff --git a/lib/LWP/Protocol/gopher.pm b/lib/LWP/Protocol/gopher.pm new file mode 100644 index 0000000..db6c0bf --- /dev/null +++ b/lib/LWP/Protocol/gopher.pm @@ -0,0 +1,213 @@ +package LWP::Protocol::gopher; + +# Implementation of the gopher protocol (RFC 1436) +# +# This code is based on 'wwwgopher.pl,v 0.10 1994/10/17 18:12:34 shelden' +# which in turn is a vastly modified version of Oscar's http'get() +# dated 28/3/94 in <ftp://cui.unige.ch/PUBLIC/oscar/scripts/http.pl> +# including contributions from Marc van Heyningen and Martijn Koster. + +use strict; +use vars qw(@ISA); + +require HTTP::Response; +require HTTP::Status; +require IO::Socket; +require IO::Select; + +require LWP::Protocol; +@ISA = qw(LWP::Protocol); + + +my %gopher2mimetype = ( + '0' => 'text/plain', # 0 file + '1' => 'text/html', # 1 menu + # 2 CSO phone-book server + # 3 Error + '4' => 'application/mac-binhex40', # 4 BinHexed Macintosh file + '5' => 'application/zip', # 5 DOS binary archive of some sort + '6' => 'application/octet-stream', # 6 UNIX uuencoded file. + '7' => 'text/html', # 7 Index-Search server + # 8 telnet session + '9' => 'application/octet-stream', # 9 binary file + 'h' => 'text/html', # html + 'g' => 'image/gif', # gif + 'I' => 'image/*', # some kind of image +); + +my %gopher2encoding = ( + '6' => 'x_uuencode', # 6 UNIX uuencoded file. +); + +sub request +{ + my($self, $request, $proxy, $arg, $size, $timeout) = @_; + + $size = 4096 unless $size; + + # check proxy + if (defined $proxy) { + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + 'You can not proxy through the gopher'); + } + + my $url = $request->uri; + die "bad scheme" if $url->scheme ne 'gopher'; + + + my $method = $request->method; + unless ($method eq 'GET' || $method eq 'HEAD') { + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + 'Library does not allow method ' . + "$method for 'gopher:' URLs"); + } + + my $gophertype = $url->gopher_type; + unless (exists $gopher2mimetype{$gophertype}) { + return HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED, + 'Library does not support gophertype ' . + $gophertype); + } + + my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK"); + $response->header('Content-type' => $gopher2mimetype{$gophertype} + || 'text/plain'); + $response->header('Content-Encoding' => $gopher2encoding{$gophertype}) + if exists $gopher2encoding{$gophertype}; + + if ($method eq 'HEAD') { + # XXX: don't even try it so we set this header + $response->header('Client-Warning' => 'Client answer only'); + return $response; + } + + if ($gophertype eq '7' && ! $url->search) { + # the url is the prompt for a gopher search; supply boiler-plate + return $self->collect_once($arg, $response, <<"EOT"); +<HEAD> +<TITLE>Gopher Index</TITLE> +<ISINDEX> +</HEAD> +<BODY> +<H1>$url<BR>Gopher Search</H1> +This is a searchable Gopher index. +Use the search function of your browser to enter search terms. +</BODY> +EOT + } + + my $host = $url->host; + my $port = $url->port; + + my $requestLine = ""; + + my $selector = $url->selector; + if (defined $selector) { + $requestLine .= $selector; + my $search = $url->search; + if (defined $search) { + $requestLine .= "\t$search"; + my $string = $url->string; + if (defined $string) { + $requestLine .= "\t$string"; + } + } + } + $requestLine .= "\015\012"; + + # potential request headers are just ignored + + # Ok, lets make the request + my $socket = IO::Socket::INET->new(PeerAddr => $host, + PeerPort => $port, + LocalAddr => $self->{ua}{local_address}, + Proto => 'tcp', + Timeout => $timeout); + die "Can't connect to $host:$port" unless $socket; + my $sel = IO::Select->new($socket); + + { + die "write timeout" if $timeout && !$sel->can_write($timeout); + my $n = syswrite($socket, $requestLine, length($requestLine)); + die $! unless defined($n); + die "short write" if $n != length($requestLine); + } + + my $user_arg = $arg; + + # must handle menus in a special way since they are to be + # converted to HTML. Undefing $arg ensures that the user does + # not see the data before we get a change to convert it. + $arg = undef if $gophertype eq '1' || $gophertype eq '7'; + + # collect response + my $buf = ''; + $response = $self->collect($arg, $response, sub { + die "read timeout" if $timeout && !$sel->can_read($timeout); + my $n = sysread($socket, $buf, $size); + die $! unless defined($n); + return \$buf; + } ); + + # Convert menu to HTML and return data to user. + if ($gophertype eq '1' || $gophertype eq '7') { + my $content = menu2html($response->content); + if (defined $user_arg) { + $response = $self->collect_once($user_arg, $response, $content); + } + else { + $response->content($content); + } + } + + $response; +} + + +sub gopher2url +{ + my($gophertype, $path, $host, $port) = @_; + + my $url; + + if ($gophertype eq '8' || $gophertype eq 'T') { + # telnet session + $url = $HTTP::URI_CLASS->new($gophertype eq '8' ? 'telnet:':'tn3270:'); + $url->user($path) if defined $path; + } + else { + $path = URI::Escape::uri_escape($path); + $url = $HTTP::URI_CLASS->new("gopher:/$gophertype$path"); + } + $url->host($host); + $url->port($port); + $url; +} + +sub menu2html { + my($menu) = @_; + + $menu =~ s/\015//g; # remove carriage return + my $tmp = <<"EOT"; +<HTML> +<HEAD> + <TITLE>Gopher menu</TITLE> +</HEAD> +<BODY> +<H1>Gopher menu</H1> +EOT + for (split("\n", $menu)) { + last if /^\./; + my($pretty, $path, $host, $port) = split("\t"); + + $pretty =~ s/^(.)//; + my $type = $1; + + my $url = gopher2url($type, $path, $host, $port)->as_string; + $tmp .= qq{<A HREF="$url">$pretty</A><BR>\n}; + } + $tmp .= "</BODY>\n</HTML>\n"; + $tmp; +} + +1; diff --git a/lib/LWP/Protocol/http.pm b/lib/LWP/Protocol/http.pm new file mode 100644 index 0000000..23c361f --- /dev/null +++ b/lib/LWP/Protocol/http.pm @@ -0,0 +1,515 @@ +package LWP::Protocol::http; + +use strict; + +require HTTP::Response; +require HTTP::Status; +require Net::HTTP; + +use vars qw(@ISA @EXTRA_SOCK_OPTS); + +require LWP::Protocol; +@ISA = qw(LWP::Protocol); + +my $CRLF = "\015\012"; + +sub _new_socket +{ + my($self, $host, $port, $timeout) = @_; + + # IPv6 literal IP address should be [bracketed] to remove + # ambiguity between ip address and port number. + if ( ($host =~ /:/) && ($host !~ /^\[/) ) { + $host = "[$host]"; + } + + local($^W) = 0; # IO::Socket::INET can be noisy + my $sock = $self->socket_class->new(PeerAddr => $host, + PeerPort => $port, + LocalAddr => $self->{ua}{local_address}, + Proto => 'tcp', + Timeout => $timeout, + KeepAlive => !!$self->{ua}{conn_cache}, + SendTE => 1, + $self->_extra_sock_opts($host, $port), + ); + + unless ($sock) { + # IO::Socket::INET leaves additional error messages in $@ + my $status = "Can't connect to $host:$port"; + if ($@ =~ /\bconnect: (.*)/ || + $@ =~ /\b(Bad hostname)\b/ || + $@ =~ /\b(certificate verify failed)\b/ || + $@ =~ /\b(Crypt-SSLeay can't verify hostnames)\b/ + ) { + $status .= " ($1)"; + } + die "$status\n\n$@"; + } + + # perl 5.005's IO::Socket does not have the blocking method. + eval { $sock->blocking(0); }; + + $sock; +} + +sub socket_type +{ + return "http"; +} + +sub socket_class +{ + my $self = shift; + (ref($self) || $self) . "::Socket"; +} + +sub _extra_sock_opts # to be overridden by subclass +{ + return @EXTRA_SOCK_OPTS; +} + +sub _check_sock +{ + #my($self, $req, $sock) = @_; +} + +sub _get_sock_info +{ + my($self, $res, $sock) = @_; + if (defined(my $peerhost = $sock->peerhost)) { + $res->header("Client-Peer" => "$peerhost:" . $sock->peerport); + } +} + +sub _fixup_header +{ + my($self, $h, $url, $proxy) = @_; + + # Extract 'Host' header + my $hhost = $url->authority; + if ($hhost =~ s/^([^\@]*)\@//) { # get rid of potential "user:pass@" + # add authorization header if we need them. HTTP URLs do + # not really support specification of user and password, but + # we allow it. + if (defined($1) && not $h->header('Authorization')) { + require URI::Escape; + $h->authorization_basic(map URI::Escape::uri_unescape($_), + split(":", $1, 2)); + } + } + $h->init_header('Host' => $hhost); + + if ($proxy && $url->scheme ne 'https') { + # Check the proxy URI's userinfo() for proxy credentials + # export http_proxy="http://proxyuser:proxypass@proxyhost:port". + # For https only the initial CONNECT requests needs authorization. + my $p_auth = $proxy->userinfo(); + if(defined $p_auth) { + require URI::Escape; + $h->proxy_authorization_basic(map URI::Escape::uri_unescape($_), + split(":", $p_auth, 2)) + } + } +} + +sub hlist_remove { + my($hlist, $k) = @_; + $k = lc $k; + for (my $i = @$hlist - 2; $i >= 0; $i -= 2) { + next unless lc($hlist->[$i]) eq $k; + splice(@$hlist, $i, 2); + } +} + +sub request +{ + my($self, $request, $proxy, $arg, $size, $timeout) = @_; + + $size ||= 4096; + + # check method + my $method = $request->method; + unless ($method =~ /^[A-Za-z0-9_!\#\$%&\'*+\-.^\`|~]+$/) { # HTTP token + return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST, + 'Library does not allow method ' . + "$method for 'http:' URLs"); + } + + my $url = $request->uri; + + # Proxying SSL with a http proxy needs issues a CONNECT request to build a + # tunnel and then upgrades the tunnel to SSL. But when doing keep-alive the + # https request does not need to be the first request in the connection, so + # we need to distinguish between + # - not yet connected (create socket and ssl upgrade) + # - connected but not inside ssl tunnel (ssl upgrade) + # - inside ssl tunnel to the target - once we are in the tunnel to the + # target we cannot only reuse the tunnel for more https requests with the + # same target + + my $ssl_tunnel = $proxy && $url->scheme eq 'https' + && $url->host.":".$url->port; + + my ($host,$port) = $proxy + ? ($proxy->host,$proxy->port) + : ($url->host,$url->port); + my $fullpath = + $method eq 'CONNECT' ? $url->host . ":" . $url->port : + $proxy && ! $ssl_tunnel ? $url->as_string : + do { + my $path = $url->path_query; + $path = "/$path" if $path !~m{^/}; + $path + }; + + my $socket; + my $conn_cache = $self->{ua}{conn_cache}; + my $cache_key; + if ( $conn_cache ) { + $cache_key = "$host:$port"; + # For https we reuse the socket immediatly only if it has an established + # tunnel to the target. Otherwise a CONNECT request followed by an SSL + # upgrade need to be done first. The request itself might reuse an + # existing non-ssl connection to the proxy + $cache_key .= "!".$ssl_tunnel if $ssl_tunnel; + if ( $socket = $conn_cache->withdraw($self->socket_type,$cache_key)) { + if ($socket->can_read(0)) { + # if the socket is readable, then either the peer has closed the + # connection or there are some garbage bytes on it. In either + # case we abandon it. + $socket->close; + $socket = undef; + } # else use $socket + } + } + + if ( ! $socket && $ssl_tunnel ) { + my $proto_https = LWP::Protocol::create('https',$self->{ua}) + or die "no support for scheme https found"; + + # only if ssl socket class is IO::Socket::SSL we can upgrade + # a plain socket to SSL. In case of Net::SSL we fall back to + # the old version + if ( my $upgrade_sub = $proto_https->can('_upgrade_sock')) { + my $response = $self->request( + HTTP::Request->new('CONNECT',"http://$ssl_tunnel"), + $proxy, + undef,$size,$timeout + ); + $response->is_success or die + "establishing SSL tunnel failed: ".$response->status_line; + $socket = $upgrade_sub->($proto_https, + $response->{client_socket},$url) + or die "SSL upgrade failed: $@"; + } else { + $socket = $proto_https->_new_socket($url->host,$url->port,$timeout); + } + } + + if ( ! $socket ) { + # connect to remote site w/o reusing established socket + $socket = $self->_new_socket($host, $port, $timeout ); + } + + my $http_version = ""; + if (my $proto = $request->protocol) { + if ($proto =~ /^(?:HTTP\/)?(1.\d+)$/) { + $http_version = $1; + $socket->http_version($http_version); + $socket->send_te(0) if $http_version eq "1.0"; + } + } + + $self->_check_sock($request, $socket); + + my @h; + my $request_headers = $request->headers->clone; + $self->_fixup_header($request_headers, $url, $proxy); + + $request_headers->scan(sub { + my($k, $v) = @_; + $k =~ s/^://; + $v =~ s/\n/ /g; + push(@h, $k, $v); + }); + + my $content_ref = $request->content_ref; + $content_ref = $$content_ref if ref($$content_ref); + my $chunked; + my $has_content; + + if (ref($content_ref) eq 'CODE') { + my $clen = $request_headers->header('Content-Length'); + $has_content++ if $clen; + unless (defined $clen) { + push(@h, "Transfer-Encoding" => "chunked"); + $has_content++; + $chunked++; + } + } + else { + # Set (or override) Content-Length header + my $clen = $request_headers->header('Content-Length'); + if (defined($$content_ref) && length($$content_ref)) { + $has_content = length($$content_ref); + if (!defined($clen) || $clen ne $has_content) { + if (defined $clen) { + warn "Content-Length header value was wrong, fixed"; + hlist_remove(\@h, 'Content-Length'); + } + push(@h, 'Content-Length' => $has_content); + } + } + elsif ($clen) { + warn "Content-Length set when there is no content, fixed"; + hlist_remove(\@h, 'Content-Length'); + } + } + + my $write_wait = 0; + $write_wait = 2 + if ($request_headers->header("Expect") || "") =~ /100-continue/; + + my $req_buf = $socket->format_request($method, $fullpath, @h); + #print "------\n$req_buf\n------\n"; + + if (!$has_content || $write_wait || $has_content > 8*1024) { + WRITE: + { + # Since this just writes out the header block it should almost + # always succeed to send the whole buffer in a single write call. + my $n = $socket->syswrite($req_buf, length($req_buf)); + unless (defined $n) { + redo WRITE if $!{EINTR}; + if ($!{EWOULDBLOCK} || $!{EAGAIN}) { + select(undef, undef, undef, 0.1); + redo WRITE; + } + die "write failed: $!"; + } + if ($n) { + substr($req_buf, 0, $n, ""); + } + else { + select(undef, undef, undef, 0.5); + } + redo WRITE if length $req_buf; + } + } + + my($code, $mess, @junk); + my $drop_connection; + + if ($has_content) { + my $eof; + my $wbuf; + my $woffset = 0; + INITIAL_READ: + if ($write_wait) { + # skip filling $wbuf when waiting for 100-continue + # because if the response is a redirect or auth required + # the request will be cloned and there is no way + # to reset the input stream + # return here via the label after the 100-continue is read + } + elsif (ref($content_ref) eq 'CODE') { + my $buf = &$content_ref(); + $buf = "" unless defined($buf); + $buf = sprintf "%x%s%s%s", length($buf), $CRLF, $buf, $CRLF + if $chunked; + substr($buf, 0, 0) = $req_buf if $req_buf; + $wbuf = \$buf; + } + else { + if ($req_buf) { + my $buf = $req_buf . $$content_ref; + $wbuf = \$buf; + } + else { + $wbuf = $content_ref; + } + $eof = 1; + } + + my $fbits = ''; + vec($fbits, fileno($socket), 1) = 1; + + WRITE: + while ($write_wait || $woffset < length($$wbuf)) { + + my $sel_timeout = $timeout; + if ($write_wait) { + $sel_timeout = $write_wait if $write_wait < $sel_timeout; + } + my $time_before; + $time_before = time if $sel_timeout; + + my $rbits = $fbits; + my $wbits = $write_wait ? undef : $fbits; + my $sel_timeout_before = $sel_timeout; + SELECT: + { + my $nfound = select($rbits, $wbits, undef, $sel_timeout); + if ($nfound < 0) { + if ($!{EINTR} || $!{EWOULDBLOCK} || $!{EAGAIN}) { + if ($time_before) { + $sel_timeout = $sel_timeout_before - (time - $time_before); + $sel_timeout = 0 if $sel_timeout < 0; + } + redo SELECT; + } + die "select failed: $!"; + } + } + + if ($write_wait) { + $write_wait -= time - $time_before; + $write_wait = 0 if $write_wait < 0; + } + + if (defined($rbits) && $rbits =~ /[^\0]/) { + # readable + my $buf = $socket->_rbuf; + my $n = $socket->sysread($buf, 1024, length($buf)); + unless (defined $n) { + die "read failed: $!" unless $!{EINTR} || $!{EWOULDBLOCK} || $!{EAGAIN}; + # if we get here the rest of the block will do nothing + # and we will retry the read on the next round + } + elsif ($n == 0) { + # the server closed the connection before we finished + # writing all the request content. No need to write any more. + $drop_connection++; + last WRITE; + } + $socket->_rbuf($buf); + if (!$code && $buf =~ /\015?\012\015?\012/) { + # a whole response header is present, so we can read it without blocking + ($code, $mess, @h) = $socket->read_response_headers(laxed => 1, + junk_out => \@junk, + ); + if ($code eq "100") { + $write_wait = 0; + undef($code); + goto INITIAL_READ; + } + else { + $drop_connection++; + last WRITE; + # XXX should perhaps try to abort write in a nice way too + } + } + } + if (defined($wbits) && $wbits =~ /[^\0]/) { + my $n = $socket->syswrite($$wbuf, length($$wbuf), $woffset); + unless (defined $n) { + die "write failed: $!" unless $!{EINTR} || $!{EWOULDBLOCK} || $!{EAGAIN}; + $n = 0; # will retry write on the next round + } + elsif ($n == 0) { + die "write failed: no bytes written"; + } + $woffset += $n; + + if (!$eof && $woffset >= length($$wbuf)) { + # need to refill buffer from $content_ref code + my $buf = &$content_ref(); + $buf = "" unless defined($buf); + $eof++ unless length($buf); + $buf = sprintf "%x%s%s%s", length($buf), $CRLF, $buf, $CRLF + if $chunked; + $wbuf = \$buf; + $woffset = 0; + } + } + } # WRITE + } + + ($code, $mess, @h) = $socket->read_response_headers(laxed => 1, junk_out => \@junk) + unless $code; + ($code, $mess, @h) = $socket->read_response_headers(laxed => 1, junk_out => \@junk) + if $code eq "100"; + + my $response = HTTP::Response->new($code, $mess); + my $peer_http_version = $socket->peer_http_version; + $response->protocol("HTTP/$peer_http_version"); + { + local $HTTP::Headers::TRANSLATE_UNDERSCORE; + $response->push_header(@h); + } + $response->push_header("Client-Junk" => \@junk) if @junk; + + $response->request($request); + $self->_get_sock_info($response, $socket); + + if ($method eq "CONNECT") { + $response->{client_socket} = $socket; # so it can be picked up + return $response; + } + + if (my @te = $response->remove_header('Transfer-Encoding')) { + $response->push_header('Client-Transfer-Encoding', \@te); + } + $response->push_header('Client-Response-Num', scalar $socket->increment_response_count); + + my $complete; + $response = $self->collect($arg, $response, sub { + my $buf = ""; #prevent use of uninitialized value in SSLeay.xs + my $n; + READ: + { + $n = $socket->read_entity_body($buf, $size); + unless (defined $n) { + redo READ if $!{EINTR} || $!{EWOULDBLOCK} || $!{EAGAIN} || $!{ENOTTY}; + die "read failed: $!"; + } + redo READ if $n == -1; + } + $complete++ if !$n; + return \$buf; + } ); + $drop_connection++ unless $complete; + + @h = $socket->get_trailers; + if (@h) { + local $HTTP::Headers::TRANSLATE_UNDERSCORE; + $response->push_header(@h); + } + + # keep-alive support + unless ($drop_connection) { + if ($cache_key) { + my %connection = map { (lc($_) => 1) } + split(/\s*,\s*/, ($response->header("Connection") || "")); + if (($peer_http_version eq "1.1" && !$connection{close}) || + $connection{"keep-alive"}) + { + $conn_cache->deposit($self->socket_type, $cache_key, $socket); + } + } + } + + $response; +} + + +#----------------------------------------------------------- +package LWP::Protocol::http::SocketMethods; + +sub ping { + my $self = shift; + !$self->can_read(0); +} + +sub increment_response_count { + my $self = shift; + return ++${*$self}{'myhttp_response_count'}; +} + +#----------------------------------------------------------- +package LWP::Protocol::http::Socket; +use vars qw(@ISA); +@ISA = qw(LWP::Protocol::http::SocketMethods Net::HTTP); + +1; diff --git a/lib/LWP/Protocol/loopback.pm b/lib/LWP/Protocol/loopback.pm new file mode 100644 index 0000000..2cd67ae --- /dev/null +++ b/lib/LWP/Protocol/loopback.pm @@ -0,0 +1,26 @@ +package LWP::Protocol::loopback; + +use strict; +use vars qw(@ISA); +require HTTP::Response; + +require LWP::Protocol; +@ISA = qw(LWP::Protocol); + +sub request { + my($self, $request, $proxy, $arg, $size, $timeout) = @_; + + my $response = HTTP::Response->new(200, "OK"); + $response->content_type("message/http; msgtype=request"); + + $response->header("Via", "loopback/1.0 $proxy") + if $proxy; + + $response->header("X-Arg", $arg); + $response->header("X-Read-Size", $size); + $response->header("X-Timeout", $timeout); + + return $self->collect_once($arg, $response, $request->as_string); +} + +1; diff --git a/lib/LWP/Protocol/mailto.pm b/lib/LWP/Protocol/mailto.pm new file mode 100644 index 0000000..46db716 --- /dev/null +++ b/lib/LWP/Protocol/mailto.pm @@ -0,0 +1,183 @@ +package LWP::Protocol::mailto; + +# This module implements the mailto protocol. It is just a simple +# frontend to the Unix sendmail program except on MacOS, where it uses +# Mail::Internet. + +require LWP::Protocol; +require HTTP::Request; +require HTTP::Response; +require HTTP::Status; + +use Carp; +use strict; +use vars qw(@ISA $SENDMAIL); + +@ISA = qw(LWP::Protocol); + +unless ($SENDMAIL = $ENV{SENDMAIL}) { + for my $sm (qw(/usr/sbin/sendmail + /usr/lib/sendmail + /usr/ucblib/sendmail + )) + { + if (-x $sm) { + $SENDMAIL = $sm; + last; + } + } + die "Can't find the 'sendmail' program" unless $SENDMAIL; +} + +sub request +{ + my($self, $request, $proxy, $arg, $size) = @_; + + my ($mail, $addr) if $^O eq "MacOS"; + my @text = () if $^O eq "MacOS"; + + # check proxy + if (defined $proxy) + { + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + 'You can not proxy with mail'); + } + + # check method + my $method = $request->method; + + if ($method ne 'POST') { + return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST, + 'Library does not allow method ' . + "$method for 'mailto:' URLs"); + } + + # check url + my $url = $request->uri; + + my $scheme = $url->scheme; + if ($scheme ne 'mailto') { + return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "LWP::Protocol::mailto::request called for '$scheme'"); + } + if ($^O eq "MacOS") { + eval { + require Mail::Internet; + }; + if($@) { + return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "You don't have MailTools installed"); + } + unless ($ENV{SMTPHOSTS}) { + return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "You don't have SMTPHOSTS defined"); + } + } + else { + unless (-x $SENDMAIL) { + return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "You don't have $SENDMAIL"); + } + } + if ($^O eq "MacOS") { + $mail = Mail::Internet->new or + return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "Can't get a Mail::Internet object"); + } + else { + open(SENDMAIL, "| $SENDMAIL -oi -t") or + return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "Can't run $SENDMAIL: $!"); + } + if ($^O eq "MacOS") { + $addr = $url->encoded822addr; + } + else { + $request = $request->clone; # we modify a copy + my @h = $url->headers; # URL headers override those in the request + while (@h) { + my $k = shift @h; + my $v = shift @h; + next unless defined $v; + if (lc($k) eq "body") { + $request->content($v); + } + else { + $request->push_header($k => $v); + } + } + } + if ($^O eq "MacOS") { + $mail->add(To => $addr); + $mail->add(split(/[:\n]/,$request->headers_as_string)); + } + else { + print SENDMAIL $request->headers_as_string; + print SENDMAIL "\n"; + } + my $content = $request->content; + if (defined $content) { + my $contRef = ref($content) ? $content : \$content; + if (ref($contRef) eq 'SCALAR') { + if ($^O eq "MacOS") { + @text = split("\n",$$contRef); + foreach (@text) { + $_ .= "\n"; + } + } + else { + print SENDMAIL $$contRef; + } + + } + elsif (ref($contRef) eq 'CODE') { + # Callback provides data + my $d; + if ($^O eq "MacOS") { + my $stuff = ""; + while (length($d = &$contRef)) { + $stuff .= $d; + } + @text = split("\n",$stuff); + foreach (@text) { + $_ .= "\n"; + } + } + else { + print SENDMAIL $d; + } + } + } + if ($^O eq "MacOS") { + $mail->body(\@text); + unless ($mail->smtpsend) { + return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "Mail::Internet->smtpsend unable to send message to <$addr>"); + } + } + else { + unless (close(SENDMAIL)) { + my $err = $! ? "$!" : "Exit status $?"; + return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "$SENDMAIL: $err"); + } + } + + + my $response = HTTP::Response->new(&HTTP::Status::RC_ACCEPTED, + "Mail accepted"); + $response->header('Content-Type', 'text/plain'); + if ($^O eq "MacOS") { + $response->header('Server' => "Mail::Internet $Mail::Internet::VERSION"); + $response->content("Message sent to <$addr>\n"); + } + else { + $response->header('Server' => $SENDMAIL); + my $to = $request->header("To"); + $response->content("Message sent to <$to>\n"); + } + + return $response; +} + +1; diff --git a/lib/LWP/Protocol/nntp.pm b/lib/LWP/Protocol/nntp.pm new file mode 100644 index 0000000..788477d --- /dev/null +++ b/lib/LWP/Protocol/nntp.pm @@ -0,0 +1,145 @@ +package LWP::Protocol::nntp; + +# Implementation of the Network News Transfer Protocol (RFC 977) + +require LWP::Protocol; +@ISA = qw(LWP::Protocol); + +require HTTP::Response; +require HTTP::Status; +require Net::NNTP; + +use strict; + + +sub request +{ + my($self, $request, $proxy, $arg, $size, $timeout) = @_; + + $size = 4096 unless $size; + + # Check for proxy + if (defined $proxy) { + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + 'You can not proxy through NNTP'); + } + + # Check that the scheme is as expected + my $url = $request->uri; + my $scheme = $url->scheme; + unless ($scheme eq 'news' || $scheme eq 'nntp') { + return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "LWP::Protocol::nntp::request called for '$scheme'"); + } + + # check for a valid method + my $method = $request->method; + unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'POST') { + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + 'Library does not allow method ' . + "$method for '$scheme:' URLs"); + } + + # extract the identifier and check against posting to an article + my $groupart = $url->_group; + my $is_art = $groupart =~ /@/; + + if ($is_art && $method eq 'POST') { + return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, + "Can't post to an article <$groupart>"); + } + + my $nntp = Net::NNTP->new($url->host, + #Port => 18574, + Timeout => $timeout, + #Debug => 1, + ); + die "Can't connect to nntp server" unless $nntp; + + # Check the initial welcome message from the NNTP server + if ($nntp->status != 2) { + return HTTP::Response->new(&HTTP::Status::RC_SERVICE_UNAVAILABLE, + $nntp->message); + } + my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK"); + + my $mess = $nntp->message; + + # Try to extract server name from greeting message. + # Don't know if this works well for a large class of servers, but + # this works for our server. + $mess =~ s/\s+ready\b.*//; + $mess =~ s/^\S+\s+//; + $response->header(Server => $mess); + + # First we handle posting of articles + if ($method eq 'POST') { + $nntp->quit; $nntp = undef; + $response->code(&HTTP::Status::RC_NOT_IMPLEMENTED); + $response->message("POST not implemented yet"); + return $response; + } + + # The method must be "GET" or "HEAD" by now + if (!$is_art) { + if (!$nntp->group($groupart)) { + $response->code(&HTTP::Status::RC_NOT_FOUND); + $response->message($nntp->message); + } + $nntp->quit; $nntp = undef; + # HEAD: just check if the group exists + if ($method eq 'GET' && $response->is_success) { + $response->code(&HTTP::Status::RC_NOT_IMPLEMENTED); + $response->message("GET newsgroup not implemented yet"); + } + return $response; + } + + # Send command to server to retrieve an article (or just the headers) + my $get = $method eq 'HEAD' ? "head" : "article"; + my $art = $nntp->$get("<$groupart>"); + unless ($art) { + $nntp->quit; $nntp = undef; + $response->code(&HTTP::Status::RC_NOT_FOUND); + $response->message($nntp->message); + return $response; + } + + # Parse headers + my($key, $val); + local $_; + while ($_ = shift @$art) { + if (/^\s+$/) { + last; # end of headers + } + elsif (/^(\S+):\s*(.*)/) { + $response->push_header($key, $val) if $key; + ($key, $val) = ($1, $2); + } + elsif (/^\s+(.*)/) { + next unless $key; + $val .= $1; + } + else { + unshift(@$art, $_); + last; + } + } + $response->push_header($key, $val) if $key; + + # Ensure that there is a Content-Type header + $response->header("Content-Type", "text/plain") + unless $response->header("Content-Type"); + + # Collect the body + $response = $self->collect_once($arg, $response, join("", @$art)) + if @$art; + + # Say goodbye to the server + $nntp->quit; + $nntp = undef; + + $response; +} + +1; diff --git a/lib/LWP/Protocol/nogo.pm b/lib/LWP/Protocol/nogo.pm new file mode 100644 index 0000000..68150a7 --- /dev/null +++ b/lib/LWP/Protocol/nogo.pm @@ -0,0 +1,24 @@ +package LWP::Protocol::nogo; +# If you want to disable access to a particular scheme, use this +# class and then call +# LWP::Protocol::implementor(that_scheme, 'LWP::Protocol::nogo'); +# For then on, attempts to access URLs with that scheme will generate +# a 500 error. + +use strict; +use vars qw(@ISA); +require HTTP::Response; +require HTTP::Status; +require LWP::Protocol; +@ISA = qw(LWP::Protocol); + +sub request { + my($self, $request) = @_; + my $scheme = $request->uri->scheme; + + return HTTP::Response->new( + &HTTP::Status::RC_INTERNAL_SERVER_ERROR, + "Access to \'$scheme\' URIs has been disabled" + ); +} +1; diff --git a/lib/LWP/RobotUA.pm b/lib/LWP/RobotUA.pm new file mode 100644 index 0000000..7b6ab7e --- /dev/null +++ b/lib/LWP/RobotUA.pm @@ -0,0 +1,303 @@ +package LWP::RobotUA; + +require LWP::UserAgent; +@ISA = qw(LWP::UserAgent); +$VERSION = "6.13"; + +require WWW::RobotRules; +require HTTP::Request; +require HTTP::Response; + +use Carp (); +use HTTP::Status (); +use HTTP::Date qw(time2str); +use strict; + + +# +# Additional attributes in addition to those found in LWP::UserAgent: +# +# $self->{'delay'} Required delay between request to the same +# server in minutes. +# +# $self->{'rules'} A WWW::RobotRules object +# + +sub new +{ + my $class = shift; + my %cnf; + if (@_ < 4) { + # legacy args + @cnf{qw(agent from rules)} = @_; + } + else { + %cnf = @_; + } + + Carp::croak('LWP::RobotUA agent required') unless $cnf{agent}; + Carp::croak('LWP::RobotUA from address required') + unless $cnf{from} && $cnf{from} =~ m/\@/; + + my $delay = delete $cnf{delay} || 1; + my $use_sleep = delete $cnf{use_sleep}; + $use_sleep = 1 unless defined($use_sleep); + my $rules = delete $cnf{rules}; + + my $self = LWP::UserAgent->new(%cnf); + $self = bless $self, $class; + + $self->{'delay'} = $delay; # minutes + $self->{'use_sleep'} = $use_sleep; + + if ($rules) { + $rules->agent($cnf{agent}); + $self->{'rules'} = $rules; + } + else { + $self->{'rules'} = WWW::RobotRules->new($cnf{agent}); + } + + $self; +} + + +sub delay { shift->_elem('delay', @_); } +sub use_sleep { shift->_elem('use_sleep', @_); } + + +sub agent +{ + my $self = shift; + my $old = $self->SUPER::agent(@_); + if (@_) { + # Changing our name means to start fresh + $self->{'rules'}->agent($self->{'agent'}); + } + $old; +} + + +sub rules { + my $self = shift; + my $old = $self->_elem('rules', @_); + $self->{'rules'}->agent($self->{'agent'}) if @_; + $old; +} + + +sub no_visits +{ + my($self, $netloc) = @_; + $self->{'rules'}->no_visits($netloc) || 0; +} + +*host_count = \&no_visits; # backwards compatibility with LWP-5.02 + + +sub host_wait +{ + my($self, $netloc) = @_; + return undef unless defined $netloc; + my $last = $self->{'rules'}->last_visit($netloc); + if ($last) { + my $wait = int($self->{'delay'} * 60 - (time - $last)); + $wait = 0 if $wait < 0; + return $wait; + } + return 0; +} + + +sub simple_request +{ + my($self, $request, $arg, $size) = @_; + + # Do we try to access a new server? + my $allowed = $self->{'rules'}->allowed($request->uri); + + if ($allowed < 0) { + # Host is not visited before, or robots.txt expired; fetch "robots.txt" + my $robot_url = $request->uri->clone; + $robot_url->path("robots.txt"); + $robot_url->query(undef); + + # make access to robot.txt legal since this will be a recursive call + $self->{'rules'}->parse($robot_url, ""); + + my $robot_req = HTTP::Request->new('GET', $robot_url); + my $parse_head = $self->parse_head(0); + my $robot_res = $self->request($robot_req); + $self->parse_head($parse_head); + my $fresh_until = $robot_res->fresh_until; + my $content = ""; + if ($robot_res->is_success && $robot_res->content_is_text) { + $content = $robot_res->decoded_content; + $content = "" unless $content && $content =~ /^\s*Disallow\s*:/mi; + } + $self->{'rules'}->parse($robot_url, $content, $fresh_until); + + # recalculate allowed... + $allowed = $self->{'rules'}->allowed($request->uri); + } + + # Check rules + unless ($allowed) { + my $res = HTTP::Response->new( + &HTTP::Status::RC_FORBIDDEN, 'Forbidden by robots.txt'); + $res->request( $request ); # bind it to that request + return $res; + } + + my $netloc = eval { local $SIG{__DIE__}; $request->uri->host_port; }; + my $wait = $self->host_wait($netloc); + + if ($wait) { + if ($self->{'use_sleep'}) { + sleep($wait) + } + else { + my $res = HTTP::Response->new( + &HTTP::Status::RC_SERVICE_UNAVAILABLE, 'Please, slow down'); + $res->header('Retry-After', time2str(time + $wait)); + $res->request( $request ); # bind it to that request + return $res; + } + } + + # Perform the request + my $res = $self->SUPER::simple_request($request, $arg, $size); + + $self->{'rules'}->visit($netloc); + + $res; +} + + +sub as_string +{ + my $self = shift; + my @s; + push(@s, "Robot: $self->{'agent'} operated by $self->{'from'} [$self]"); + push(@s, " Minimum delay: " . int($self->{'delay'}*60) . "s"); + push(@s, " Will sleep if too early") if $self->{'use_sleep'}; + push(@s, " Rules = $self->{'rules'}"); + join("\n", @s, ''); +} + +1; + + +__END__ + +=head1 NAME + +LWP::RobotUA - a class for well-behaved Web robots + +=head1 SYNOPSIS + + use LWP::RobotUA; + my $ua = LWP::RobotUA->new('my-robot/0.1', 'me@foo.com'); + $ua->delay(10); # be very nice -- max one hit every ten minutes! + ... + + # Then just use it just like a normal LWP::UserAgent: + my $response = $ua->get('http://whatever.int/...'); + ... + +=head1 DESCRIPTION + +This class implements a user agent that is suitable for robot +applications. Robots should be nice to the servers they visit. They +should consult the F</robots.txt> file to ensure that they are welcomed +and they should not make requests too frequently. + +But before you consider writing a robot, take a look at +<URL:http://www.robotstxt.org/>. + +When you use an I<LWP::RobotUA> object as your user agent, then you do not +really have to think about these things yourself; C<robots.txt> files +are automatically consulted and obeyed, the server isn't queried +too rapidly, and so on. Just send requests +as you do when you are using a normal I<LWP::UserAgent> +object (using C<< $ua->get(...) >>, C<< $ua->head(...) >>, +C<< $ua->request(...) >>, etc.), and this +special agent will make sure you are nice. + +=head1 METHODS + +The LWP::RobotUA is a sub-class of LWP::UserAgent and implements the +same methods. In addition the following methods are provided: + +=over 4 + +=item $ua = LWP::RobotUA->new( %options ) + +=item $ua = LWP::RobotUA->new( $agent, $from ) + +=item $ua = LWP::RobotUA->new( $agent, $from, $rules ) + +The LWP::UserAgent options C<agent> and C<from> are mandatory. The +options C<delay>, C<use_sleep> and C<rules> initialize attributes +private to the RobotUA. If C<rules> are not provided, then +C<WWW::RobotRules> is instantiated providing an internal database of +F<robots.txt>. + +It is also possible to just pass the value of C<agent>, C<from> and +optionally C<rules> as plain positional arguments. + +=item $ua->delay + +=item $ua->delay( $minutes ) + +Get/set the minimum delay between requests to the same server, in +I<minutes>. The default is 1 minute. Note that this number doesn't +have to be an integer; for example, this sets the delay to 10 seconds: + + $ua->delay(10/60); + +=item $ua->use_sleep + +=item $ua->use_sleep( $boolean ) + +Get/set a value indicating whether the UA should sleep() if requests +arrive too fast, defined as $ua->delay minutes not passed since +last request to the given server. The default is TRUE. If this value is +FALSE then an internal SERVICE_UNAVAILABLE response will be generated. +It will have a Retry-After header that indicates when it is OK to +send another request to this server. + +=item $ua->rules + +=item $ua->rules( $rules ) + +Set/get which I<WWW::RobotRules> object to use. + +=item $ua->no_visits( $netloc ) + +Returns the number of documents fetched from this server host. Yeah I +know, this method should probably have been named num_visits() or +something like that. :-( + +=item $ua->host_wait( $netloc ) + +Returns the number of I<seconds> (from now) you must wait before you can +make a new request to this host. + +=item $ua->as_string + +Returns a string that describes the state of the UA. +Mainly useful for debugging. + +=back + +=head1 SEE ALSO + +L<LWP::UserAgent>, L<WWW::RobotRules> + +=head1 COPYRIGHT + +Copyright 1996-2004 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. diff --git a/lib/LWP/Simple.pm b/lib/LWP/Simple.pm new file mode 100644 index 0000000..aa159c4 --- /dev/null +++ b/lib/LWP/Simple.pm @@ -0,0 +1,253 @@ +package LWP::Simple; + +use strict; +use vars qw($ua %loop_check $FULL_LWP @EXPORT @EXPORT_OK $VERSION); + +require Exporter; + +@EXPORT = qw(get head getprint getstore mirror); +@EXPORT_OK = qw($ua); + +# I really hate this. I was a bad idea to do it in the first place. +# Wonder how to get rid of it??? (It even makes LWP::Simple 7% slower +# for trivial tests) +use HTTP::Status; +push(@EXPORT, @HTTP::Status::EXPORT); + +$VERSION = "6.13"; + +sub import +{ + my $pkg = shift; + my $callpkg = caller; + Exporter::export($pkg, $callpkg, @_); +} + +use LWP::UserAgent (); +use HTTP::Status (); +use HTTP::Date (); +$ua = LWP::UserAgent->new; # we create a global UserAgent object +$ua->agent("LWP::Simple/$VERSION "); +$ua->env_proxy; + + +sub get ($) +{ + my $response = $ua->get(shift); + return $response->decoded_content if $response->is_success; + return undef; +} + + +sub head ($) +{ + my($url) = @_; + my $request = HTTP::Request->new(HEAD => $url); + my $response = $ua->request($request); + + if ($response->is_success) { + return $response unless wantarray; + return (scalar $response->header('Content-Type'), + scalar $response->header('Content-Length'), + HTTP::Date::str2time($response->header('Last-Modified')), + HTTP::Date::str2time($response->header('Expires')), + scalar $response->header('Server'), + ); + } + return; +} + + +sub getprint ($) +{ + my($url) = @_; + my $request = HTTP::Request->new(GET => $url); + local($\) = ""; # ensure standard $OUTPUT_RECORD_SEPARATOR + my $callback = sub { print $_[0] }; + if ($^O eq "MacOS") { + $callback = sub { $_[0] =~ s/\015?\012/\n/g; print $_[0] } + } + my $response = $ua->request($request, $callback); + unless ($response->is_success) { + print STDERR $response->status_line, " <URL:$url>\n"; + } + $response->code; +} + + +sub getstore ($$) +{ + my($url, $file) = @_; + my $request = HTTP::Request->new(GET => $url); + my $response = $ua->request($request, $file); + + $response->code; +} + + +sub mirror ($$) +{ + my($url, $file) = @_; + my $response = $ua->mirror($url, $file); + $response->code; +} + + +1; + +__END__ + +=head1 NAME + +LWP::Simple - simple procedural interface to LWP + +=head1 SYNOPSIS + + perl -MLWP::Simple -e 'getprint "http://www.sn.no"' + + use LWP::Simple; + $content = get("http://www.sn.no/"); + die "Couldn't get it!" unless defined $content; + + if (mirror("http://www.sn.no/", "foo") == RC_NOT_MODIFIED) { + ... + } + + if (is_success(getprint("http://www.sn.no/"))) { + ... + } + +=head1 DESCRIPTION + +This module is meant for people who want a simplified view of the +libwww-perl library. It should also be suitable for one-liners. If +you need more control or access to the header fields in the requests +sent and responses received, then you should use the full object-oriented +interface provided by the C<LWP::UserAgent> module. + +The following functions are provided (and exported) by this module: + +=over 3 + +=item get($url) + +The get() function will fetch the document identified by the given URL +and return it. It returns C<undef> if it fails. The $url argument can +be either a string or a reference to a URI object. + +You will not be able to examine the response code or response headers +(like 'Content-Type') when you are accessing the web using this +function. If you need that information you should use the full OO +interface (see L<LWP::UserAgent>). + +=item head($url) + +Get document headers. Returns the following 5 values if successful: +($content_type, $document_length, $modified_time, $expires, $server) + +Returns an empty list if it fails. In scalar context returns TRUE if +successful. + +=item getprint($url) + +Get and print a document identified by a URL. The document is printed +to the selected default filehandle for output (normally STDOUT) as +data is received from the network. If the request fails, then the +status code and message are printed on STDERR. The return value is +the HTTP response code. + +=item getstore($url, $file) + +Gets a document identified by a URL and stores it in the file. The +return value is the HTTP response code. + +=item mirror($url, $file) + +Get and store a document identified by a URL, using +I<If-modified-since>, and checking the I<Content-Length>. Returns +the HTTP response code. + +=back + +This module also exports the HTTP::Status constants and procedures. +You can use them when you check the response code from getprint(), +getstore() or mirror(). The constants are: + + RC_CONTINUE + RC_SWITCHING_PROTOCOLS + RC_OK + RC_CREATED + RC_ACCEPTED + RC_NON_AUTHORITATIVE_INFORMATION + RC_NO_CONTENT + RC_RESET_CONTENT + RC_PARTIAL_CONTENT + RC_MULTIPLE_CHOICES + RC_MOVED_PERMANENTLY + RC_MOVED_TEMPORARILY + RC_SEE_OTHER + RC_NOT_MODIFIED + RC_USE_PROXY + RC_BAD_REQUEST + RC_UNAUTHORIZED + RC_PAYMENT_REQUIRED + RC_FORBIDDEN + RC_NOT_FOUND + RC_METHOD_NOT_ALLOWED + RC_NOT_ACCEPTABLE + RC_PROXY_AUTHENTICATION_REQUIRED + RC_REQUEST_TIMEOUT + RC_CONFLICT + RC_GONE + RC_LENGTH_REQUIRED + RC_PRECONDITION_FAILED + RC_REQUEST_ENTITY_TOO_LARGE + RC_REQUEST_URI_TOO_LARGE + RC_UNSUPPORTED_MEDIA_TYPE + RC_INTERNAL_SERVER_ERROR + RC_NOT_IMPLEMENTED + RC_BAD_GATEWAY + RC_SERVICE_UNAVAILABLE + RC_GATEWAY_TIMEOUT + RC_HTTP_VERSION_NOT_SUPPORTED + +The HTTP::Status classification functions are: + +=over 3 + +=item is_success($rc) + +True if response code indicated a successful request. + +=item is_error($rc) + +True if response code indicated that an error occurred. + +=back + +The module will also export the LWP::UserAgent object as C<$ua> if you +ask for it explicitly. + +The user agent created by this module will identify itself as +"LWP::Simple/#.##" +and will initialize its proxy defaults from the environment (by +calling $ua->env_proxy). + +=head1 CAVEAT + +Note that if you are using both LWP::Simple and the very popular CGI.pm +module, you may be importing a C<head> function from each module, +producing a warning like "Prototype mismatch: sub main::head ($) vs +none". Get around this problem by just not importing LWP::Simple's +C<head> function, like so: + + use LWP::Simple qw(!head); + use CGI qw(:standard); # then only CGI.pm defines a head() + +Then if you do need LWP::Simple's C<head> function, you can just call +it as C<LWP::Simple::head($url)>. + +=head1 SEE ALSO + +L<LWP>, L<lwpcook>, L<LWP::UserAgent>, L<HTTP::Status>, L<lwp-request>, +L<lwp-mirror> diff --git a/lib/LWP/UserAgent.pm b/lib/LWP/UserAgent.pm new file mode 100644 index 0000000..0f0773e --- /dev/null +++ b/lib/LWP/UserAgent.pm @@ -0,0 +1,1861 @@ +package LWP::UserAgent; + +use strict; +use vars qw(@ISA $VERSION); + +require LWP::MemberMixin; +@ISA = qw(LWP::MemberMixin); +$VERSION = "6.13"; + +use HTTP::Request (); +use HTTP::Response (); +use HTTP::Date (); + +use LWP (); +use LWP::Protocol (); + +use Carp (); + + +sub new +{ + # Check for common user mistake + Carp::croak("Options to LWP::UserAgent should be key/value pairs, not hash reference") + if ref($_[1]) eq 'HASH'; + + my($class, %cnf) = @_; + + my $agent = delete $cnf{agent}; + my $from = delete $cnf{from}; + my $def_headers = delete $cnf{default_headers}; + my $timeout = delete $cnf{timeout}; + $timeout = 3*60 unless defined $timeout; + my $local_address = delete $cnf{local_address}; + my $ssl_opts = delete $cnf{ssl_opts} || {}; + unless (exists $ssl_opts->{verify_hostname}) { + # The processing of HTTPS_CA_* below is for compatibility with Crypt::SSLeay + if (exists $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME}) { + $ssl_opts->{verify_hostname} = $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME}; + } + elsif ($ENV{HTTPS_CA_FILE} || $ENV{HTTPS_CA_DIR}) { + # Crypt-SSLeay compatibility (verify peer certificate; but not the hostname) + $ssl_opts->{verify_hostname} = 0; + $ssl_opts->{SSL_verify_mode} = 1; + } + else { + $ssl_opts->{verify_hostname} = 1; + } + } + unless (exists $ssl_opts->{SSL_ca_file}) { + if (my $ca_file = $ENV{PERL_LWP_SSL_CA_FILE} || $ENV{HTTPS_CA_FILE}) { + $ssl_opts->{SSL_ca_file} = $ca_file; + } + } + unless (exists $ssl_opts->{SSL_ca_path}) { + if (my $ca_path = $ENV{PERL_LWP_SSL_CA_PATH} || $ENV{HTTPS_CA_DIR}) { + $ssl_opts->{SSL_ca_path} = $ca_path; + } + } + my $use_eval = delete $cnf{use_eval}; + $use_eval = 1 unless defined $use_eval; + my $parse_head = delete $cnf{parse_head}; + $parse_head = 1 unless defined $parse_head; + my $show_progress = delete $cnf{show_progress}; + my $max_size = delete $cnf{max_size}; + my $max_redirect = delete $cnf{max_redirect}; + $max_redirect = 7 unless defined $max_redirect; + my $env_proxy = exists $cnf{env_proxy} ? delete $cnf{env_proxy} : $ENV{PERL_LWP_ENV_PROXY}; + + my $cookie_jar = delete $cnf{cookie_jar}; + my $conn_cache = delete $cnf{conn_cache}; + my $keep_alive = delete $cnf{keep_alive}; + + Carp::croak("Can't mix conn_cache and keep_alive") + if $conn_cache && $keep_alive; + + my $protocols_allowed = delete $cnf{protocols_allowed}; + my $protocols_forbidden = delete $cnf{protocols_forbidden}; + + my $requests_redirectable = delete $cnf{requests_redirectable}; + $requests_redirectable = ['GET', 'HEAD'] + unless defined $requests_redirectable; + + # Actually ""s are just as good as 0's, but for concision we'll just say: + Carp::croak("protocols_allowed has to be an arrayref or 0, not \"$protocols_allowed\"!") + if $protocols_allowed and ref($protocols_allowed) ne 'ARRAY'; + Carp::croak("protocols_forbidden has to be an arrayref or 0, not \"$protocols_forbidden\"!") + if $protocols_forbidden and ref($protocols_forbidden) ne 'ARRAY'; + Carp::croak("requests_redirectable has to be an arrayref or 0, not \"$requests_redirectable\"!") + if $requests_redirectable and ref($requests_redirectable) ne 'ARRAY'; + + + if (%cnf && $^W) { + Carp::carp("Unrecognized LWP::UserAgent options: @{[sort keys %cnf]}"); + } + + my $self = bless { + def_headers => $def_headers, + timeout => $timeout, + local_address => $local_address, + ssl_opts => $ssl_opts, + use_eval => $use_eval, + show_progress=> $show_progress, + max_size => $max_size, + max_redirect => $max_redirect, + proxy => {}, + no_proxy => [], + protocols_allowed => $protocols_allowed, + protocols_forbidden => $protocols_forbidden, + requests_redirectable => $requests_redirectable, + }, $class; + + $self->agent(defined($agent) ? $agent : $class->_agent) + if defined($agent) || !$def_headers || !$def_headers->header("User-Agent"); + $self->from($from) if $from; + $self->cookie_jar($cookie_jar) if $cookie_jar; + $self->parse_head($parse_head); + $self->env_proxy if $env_proxy; + + $self->protocols_allowed( $protocols_allowed ) if $protocols_allowed; + $self->protocols_forbidden($protocols_forbidden) if $protocols_forbidden; + + if ($keep_alive) { + $conn_cache ||= { total_capacity => $keep_alive }; + } + $self->conn_cache($conn_cache) if $conn_cache; + + return $self; +} + + +sub send_request +{ + my($self, $request, $arg, $size) = @_; + my($method, $url) = ($request->method, $request->uri); + my $scheme = $url->scheme; + + local($SIG{__DIE__}); # protect against user defined die handlers + + $self->progress("begin", $request); + + my $response = $self->run_handlers("request_send", $request); + + unless ($response) { + my $protocol; + + { + # Honor object-specific restrictions by forcing protocol objects + # into class LWP::Protocol::nogo. + my $x; + if($x = $self->protocols_allowed) { + if (grep lc($_) eq $scheme, @$x) { + } + else { + require LWP::Protocol::nogo; + $protocol = LWP::Protocol::nogo->new; + } + } + elsif ($x = $self->protocols_forbidden) { + if(grep lc($_) eq $scheme, @$x) { + require LWP::Protocol::nogo; + $protocol = LWP::Protocol::nogo->new; + } + } + # else fall thru and create the protocol object normally + } + + # Locate protocol to use + my $proxy = $request->{proxy}; + if ($proxy) { + $scheme = $proxy->scheme; + } + + unless ($protocol) { + $protocol = eval { LWP::Protocol::create($scheme, $self) }; + if ($@) { + $@ =~ s/ at .* line \d+.*//s; # remove file/line number + $response = _new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@); + if ($scheme eq "https") { + $response->message($response->message . " (LWP::Protocol::https not installed)"); + $response->content_type("text/plain"); + $response->content(<<EOT); +LWP will support https URLs if the LWP::Protocol::https module +is installed. +EOT + } + } + } + + if (!$response && $self->{use_eval}) { + # we eval, and turn dies into responses below + eval { + $response = $protocol->request($request, $proxy, $arg, $size, $self->{timeout}) || + die "No response returned by $protocol"; + }; + if ($@) { + if (UNIVERSAL::isa($@, "HTTP::Response")) { + $response = $@; + $response->request($request); + } + else { + my $full = $@; + (my $status = $@) =~ s/\n.*//s; + $status =~ s/ at .* line \d+.*//s; # remove file/line number + my $code = ($status =~ s/^(\d\d\d)\s+//) ? $1 : &HTTP::Status::RC_INTERNAL_SERVER_ERROR; + $response = _new_response($request, $code, $status, $full); + } + } + } + elsif (!$response) { + $response = $protocol->request($request, $proxy, + $arg, $size, $self->{timeout}); + # XXX: Should we die unless $response->is_success ??? + } + } + + $response->request($request); # record request for reference + $response->header("Client-Date" => HTTP::Date::time2str(time)); + + $self->run_handlers("response_done", $response); + + $self->progress("end", $response); + return $response; +} + + +sub prepare_request +{ + my($self, $request) = @_; + die "Method missing" unless $request->method; + my $url = $request->uri; + die "URL missing" unless $url; + die "URL must be absolute" unless $url->scheme; + + $self->run_handlers("request_preprepare", $request); + + if (my $def_headers = $self->{def_headers}) { + for my $h ($def_headers->header_field_names) { + $request->init_header($h => [$def_headers->header($h)]); + } + } + + $self->run_handlers("request_prepare", $request); + + return $request; +} + + +sub simple_request +{ + my($self, $request, $arg, $size) = @_; + + # sanity check the request passed in + if (defined $request) { + if (ref $request) { + Carp::croak("You need a request object, not a " . ref($request) . " object") + if ref($request) eq 'ARRAY' or ref($request) eq 'HASH' or + !$request->can('method') or !$request->can('uri'); + } + else { + Carp::croak("You need a request object, not '$request'"); + } + } + else { + Carp::croak("No request object passed in"); + } + + eval { + $request = $self->prepare_request($request); + }; + if ($@) { + $@ =~ s/ at .* line \d+.*//s; # remove file/line number + return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, $@); + } + return $self->send_request($request, $arg, $size); +} + + +sub request +{ + my($self, $request, $arg, $size, $previous) = @_; + + my $response = $self->simple_request($request, $arg, $size); + $response->previous($previous) if $previous; + + if ($response->redirects >= $self->{max_redirect}) { + $response->header("Client-Warning" => + "Redirect loop detected (max_redirect = $self->{max_redirect})"); + return $response; + } + + if (my $req = $self->run_handlers("response_redirect", $response)) { + return $self->request($req, $arg, $size, $response); + } + + my $code = $response->code; + + if ($code == &HTTP::Status::RC_MOVED_PERMANENTLY or + $code == &HTTP::Status::RC_FOUND or + $code == &HTTP::Status::RC_SEE_OTHER or + $code == &HTTP::Status::RC_TEMPORARY_REDIRECT) + { + my $referral = $request->clone; + + # These headers should never be forwarded + $referral->remove_header('Host', 'Cookie'); + + if ($referral->header('Referer') && + $request->uri->scheme eq 'https' && + $referral->uri->scheme eq 'http') + { + # RFC 2616, section 15.1.3. + # https -> http redirect, suppressing Referer + $referral->remove_header('Referer'); + } + + if ($code == &HTTP::Status::RC_SEE_OTHER || + $code == &HTTP::Status::RC_FOUND) + { + my $method = uc($referral->method); + unless ($method eq "GET" || $method eq "HEAD") { + $referral->method("GET"); + $referral->content(""); + $referral->remove_content_headers; + } + } + + # And then we update the URL based on the Location:-header. + my $referral_uri = $response->header('Location'); + { + # Some servers erroneously return a relative URL for redirects, + # so make it absolute if it not already is. + local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1; + my $base = $response->base; + $referral_uri = "" unless defined $referral_uri; + $referral_uri = $HTTP::URI_CLASS->new($referral_uri, $base) + ->abs($base); + } + $referral->uri($referral_uri); + + return $response unless $self->redirect_ok($referral, $response); + return $self->request($referral, $arg, $size, $response); + + } + elsif ($code == &HTTP::Status::RC_UNAUTHORIZED || + $code == &HTTP::Status::RC_PROXY_AUTHENTICATION_REQUIRED + ) + { + my $proxy = ($code == &HTTP::Status::RC_PROXY_AUTHENTICATION_REQUIRED); + my $ch_header = $proxy || $request->method eq 'CONNECT' + ? "Proxy-Authenticate" : "WWW-Authenticate"; + my @challenge = $response->header($ch_header); + unless (@challenge) { + $response->header("Client-Warning" => + "Missing Authenticate header"); + return $response; + } + + require HTTP::Headers::Util; + CHALLENGE: for my $challenge (@challenge) { + $challenge =~ tr/,/;/; # "," is used to separate auth-params!! + ($challenge) = HTTP::Headers::Util::split_header_words($challenge); + my $scheme = shift(@$challenge); + shift(@$challenge); # no value + $challenge = { @$challenge }; # make rest into a hash + + unless ($scheme =~ /^([a-z]+(?:-[a-z]+)*)$/) { + $response->header("Client-Warning" => + "Bad authentication scheme '$scheme'"); + return $response; + } + $scheme = $1; # untainted now + my $class = "LWP::Authen::\u$scheme"; + $class =~ s/-/_/g; + + no strict 'refs'; + unless (%{"$class\::"}) { + # try to load it + eval "require $class"; + if ($@) { + if ($@ =~ /^Can\'t locate/) { + $response->header("Client-Warning" => + "Unsupported authentication scheme '$scheme'"); + } + else { + $response->header("Client-Warning" => $@); + } + next CHALLENGE; + } + } + unless ($class->can("authenticate")) { + $response->header("Client-Warning" => + "Unsupported authentication scheme '$scheme'"); + next CHALLENGE; + } + return $class->authenticate($self, $proxy, $challenge, $response, + $request, $arg, $size); + } + return $response; + } + return $response; +} + + +# +# Now the shortcuts... +# +sub get { + require HTTP::Request::Common; + my($self, @parameters) = @_; + my @suff = $self->_process_colonic_headers(\@parameters,1); + return $self->request( HTTP::Request::Common::GET( @parameters ), @suff ); +} + + +sub post { + require HTTP::Request::Common; + my($self, @parameters) = @_; + my @suff = $self->_process_colonic_headers(\@parameters, (ref($parameters[1]) ? 2 : 1)); + return $self->request( HTTP::Request::Common::POST( @parameters ), @suff ); +} + + +sub head { + require HTTP::Request::Common; + my($self, @parameters) = @_; + my @suff = $self->_process_colonic_headers(\@parameters,1); + return $self->request( HTTP::Request::Common::HEAD( @parameters ), @suff ); +} + + +sub put { + require HTTP::Request::Common; + my($self, @parameters) = @_; + my @suff = $self->_process_colonic_headers(\@parameters, (ref($parameters[1]) ? 2 : 1)); + return $self->request( HTTP::Request::Common::PUT( @parameters ), @suff ); +} + + +sub delete { + require HTTP::Request::Common; + my($self, @parameters) = @_; + my @suff = $self->_process_colonic_headers(\@parameters,1); + return $self->request( HTTP::Request::Common::DELETE( @parameters ), @suff ); +} + + +sub _process_colonic_headers { + # Process :content_cb / :content_file / :read_size_hint headers. + my($self, $args, $start_index) = @_; + + my($arg, $size); + for(my $i = $start_index; $i < @$args; $i += 2) { + next unless defined $args->[$i]; + + #printf "Considering %s => %s\n", $args->[$i], $args->[$i + 1]; + + if($args->[$i] eq ':content_cb') { + # Some sanity-checking... + $arg = $args->[$i + 1]; + Carp::croak("A :content_cb value can't be undef") unless defined $arg; + Carp::croak("A :content_cb value must be a coderef") + unless ref $arg and UNIVERSAL::isa($arg, 'CODE'); + + } + elsif ($args->[$i] eq ':content_file') { + $arg = $args->[$i + 1]; + + # Some sanity-checking... + Carp::croak("A :content_file value can't be undef") + unless defined $arg; + Carp::croak("A :content_file value can't be a reference") + if ref $arg; + Carp::croak("A :content_file value can't be \"\"") + unless length $arg; + + } + elsif ($args->[$i] eq ':read_size_hint') { + $size = $args->[$i + 1]; + # Bother checking it? + + } + else { + next; + } + splice @$args, $i, 2; + $i -= 2; + } + + # And return a suitable suffix-list for request(REQ,...) + + return unless defined $arg; + return $arg, $size if defined $size; + return $arg; +} + + +sub is_online { + my $self = shift; + return 1 if $self->get("http://www.msftncsi.com/ncsi.txt")->content eq "Microsoft NCSI"; + return 1 if $self->get("http://www.apple.com")->content =~ m,<title>Apple</title>,; + return 0; +} + + +my @ANI = qw(- \ | /); + +sub progress { + my($self, $status, $m) = @_; + return unless $self->{show_progress}; + + local($,, $\); + if ($status eq "begin") { + print STDERR "** ", $m->method, " ", $m->uri, " ==> "; + $self->{progress_start} = time; + $self->{progress_lastp} = ""; + $self->{progress_ani} = 0; + } + elsif ($status eq "end") { + delete $self->{progress_lastp}; + delete $self->{progress_ani}; + print STDERR $m->status_line; + my $t = time - delete $self->{progress_start}; + print STDERR " (${t}s)" if $t; + print STDERR "\n"; + } + elsif ($status eq "tick") { + print STDERR "$ANI[$self->{progress_ani}++]\b"; + $self->{progress_ani} %= @ANI; + } + else { + my $p = sprintf "%3.0f%%", $status * 100; + return if $p eq $self->{progress_lastp}; + print STDERR "$p\b\b\b\b"; + $self->{progress_lastp} = $p; + } + STDERR->flush; +} + + +# +# This whole allow/forbid thing is based on man 1 at's way of doing things. +# +sub is_protocol_supported +{ + my($self, $scheme) = @_; + if (ref $scheme) { + # assume we got a reference to an URI object + $scheme = $scheme->scheme; + } + else { + Carp::croak("Illegal scheme '$scheme' passed to is_protocol_supported") + if $scheme =~ /\W/; + $scheme = lc $scheme; + } + + my $x; + if(ref($self) and $x = $self->protocols_allowed) { + return 0 unless grep lc($_) eq $scheme, @$x; + } + elsif (ref($self) and $x = $self->protocols_forbidden) { + return 0 if grep lc($_) eq $scheme, @$x; + } + + local($SIG{__DIE__}); # protect against user defined die handlers + $x = LWP::Protocol::implementor($scheme); + return 1 if $x and $x ne 'LWP::Protocol::nogo'; + return 0; +} + + +sub protocols_allowed { shift->_elem('protocols_allowed' , @_) } +sub protocols_forbidden { shift->_elem('protocols_forbidden' , @_) } +sub requests_redirectable { shift->_elem('requests_redirectable', @_) } + + +sub redirect_ok +{ + # RFC 2616, section 10.3.2 and 10.3.3 say: + # If the 30[12] status code is received in response to a request other + # than GET or HEAD, the user agent MUST NOT automatically redirect the + # request unless it can be confirmed by the user, since this might + # change the conditions under which the request was issued. + + # Note that this routine used to be just: + # return 0 if $_[1]->method eq "POST"; return 1; + + my($self, $new_request, $response) = @_; + my $method = $response->request->method; + return 0 unless grep $_ eq $method, + @{ $self->requests_redirectable || [] }; + + if ($new_request->uri->scheme eq 'file') { + $response->header("Client-Warning" => + "Can't redirect to a file:// URL!"); + return 0; + } + + # Otherwise it's apparently okay... + return 1; +} + + +sub credentials +{ + my $self = shift; + my $netloc = lc(shift); + my $realm = shift || ""; + my $old = $self->{basic_authentication}{$netloc}{$realm}; + if (@_) { + $self->{basic_authentication}{$netloc}{$realm} = [@_]; + } + return unless $old; + return @$old if wantarray; + return join(":", @$old); +} + + +sub get_basic_credentials +{ + my($self, $realm, $uri, $proxy) = @_; + return if $proxy; + return $self->credentials($uri->host_port, $realm); +} + + +sub timeout { shift->_elem('timeout', @_); } +sub local_address{ shift->_elem('local_address',@_); } +sub max_size { shift->_elem('max_size', @_); } +sub max_redirect { shift->_elem('max_redirect', @_); } +sub show_progress{ shift->_elem('show_progress', @_); } + +sub ssl_opts { + my $self = shift; + if (@_ == 1) { + my $k = shift; + return $self->{ssl_opts}{$k}; + } + if (@_) { + my $old; + while (@_) { + my($k, $v) = splice(@_, 0, 2); + $old = $self->{ssl_opts}{$k} unless @_; + if (defined $v) { + $self->{ssl_opts}{$k} = $v; + } + else { + delete $self->{ssl_opts}{$k}; + } + } + %{$self->{ssl_opts}} = (%{$self->{ssl_opts}}, @_); + return $old; + } + + return keys %{$self->{ssl_opts}}; +} + +sub parse_head { + my $self = shift; + if (@_) { + my $flag = shift; + my $parser; + my $old = $self->set_my_handler("response_header", $flag ? sub { + my($response, $ua) = @_; + require HTML::HeadParser; + $parser = HTML::HeadParser->new; + $parser->xml_mode(1) if $response->content_is_xhtml; + $parser->utf8_mode(1) if $] >= 5.008 && $HTML::Parser::VERSION >= 3.40; + + push(@{$response->{handlers}{response_data}}, { + callback => sub { + return unless $parser; + unless ($parser->parse($_[3])) { + my $h = $parser->header; + my $r = $_[0]; + for my $f ($h->header_field_names) { + $r->init_header($f, [$h->header($f)]); + } + undef($parser); + } + }, + }); + + } : undef, + m_media_type => "html", + ); + return !!$old; + } + else { + return !!$self->get_my_handler("response_header"); + } +} + +sub cookie_jar { + my $self = shift; + my $old = $self->{cookie_jar}; + if (@_) { + my $jar = shift; + if (ref($jar) eq "HASH") { + require HTTP::Cookies; + $jar = HTTP::Cookies->new(%$jar); + } + $self->{cookie_jar} = $jar; + $self->set_my_handler("request_prepare", + $jar ? sub { $jar->add_cookie_header($_[0]); } : undef, + ); + $self->set_my_handler("response_done", + $jar ? sub { $jar->extract_cookies($_[0]); } : undef, + ); + } + $old; +} + +sub default_headers { + my $self = shift; + my $old = $self->{def_headers} ||= HTTP::Headers->new; + if (@_) { + Carp::croak("default_headers not set to HTTP::Headers compatible object") + unless @_ == 1 && $_[0]->can("header_field_names"); + $self->{def_headers} = shift; + } + return $old; +} + +sub default_header { + my $self = shift; + return $self->default_headers->header(@_); +} + +sub _agent { "libwww-perl/$LWP::VERSION" } + +sub agent { + my $self = shift; + if (@_) { + my $agent = shift; + if ($agent) { + $agent .= $self->_agent if $agent =~ /\s+$/; + } + else { + undef($agent) + } + return $self->default_header("User-Agent", $agent); + } + return $self->default_header("User-Agent"); +} + +sub from { # legacy + my $self = shift; + return $self->default_header("From", @_); +} + + +sub conn_cache { + my $self = shift; + my $old = $self->{conn_cache}; + if (@_) { + my $cache = shift; + if (ref($cache) eq "HASH") { + require LWP::ConnCache; + $cache = LWP::ConnCache->new(%$cache); + } + $self->{conn_cache} = $cache; + } + $old; +} + + +sub add_handler { + my($self, $phase, $cb, %spec) = @_; + $spec{line} ||= join(":", (caller)[1,2]); + my $conf = $self->{handlers}{$phase} ||= do { + require HTTP::Config; + HTTP::Config->new; + }; + $conf->add(%spec, callback => $cb); +} + +sub set_my_handler { + my($self, $phase, $cb, %spec) = @_; + $spec{owner} = (caller(1))[3] unless exists $spec{owner}; + $self->remove_handler($phase, %spec); + $spec{line} ||= join(":", (caller)[1,2]); + $self->add_handler($phase, $cb, %spec) if $cb; +} + +sub get_my_handler { + my $self = shift; + my $phase = shift; + my $init = pop if @_ % 2; + my %spec = @_; + my $conf = $self->{handlers}{$phase}; + unless ($conf) { + return unless $init; + require HTTP::Config; + $conf = $self->{handlers}{$phase} = HTTP::Config->new; + } + $spec{owner} = (caller(1))[3] unless exists $spec{owner}; + my @h = $conf->find(%spec); + if (!@h && $init) { + if (ref($init) eq "CODE") { + $init->(\%spec); + } + elsif (ref($init) eq "HASH") { + while (my($k, $v) = each %$init) { + $spec{$k} = $v; + } + } + $spec{callback} ||= sub {}; + $spec{line} ||= join(":", (caller)[1,2]); + $conf->add(\%spec); + return \%spec; + } + return wantarray ? @h : $h[0]; +} + +sub remove_handler { + my($self, $phase, %spec) = @_; + if ($phase) { + my $conf = $self->{handlers}{$phase} || return; + my @h = $conf->remove(%spec); + delete $self->{handlers}{$phase} if $conf->empty; + return @h; + } + + return unless $self->{handlers}; + return map $self->remove_handler($_), sort keys %{$self->{handlers}}; +} + +sub handlers { + my($self, $phase, $o) = @_; + my @h; + if ($o->{handlers} && $o->{handlers}{$phase}) { + push(@h, @{$o->{handlers}{$phase}}); + } + if (my $conf = $self->{handlers}{$phase}) { + push(@h, $conf->matching($o)); + } + return @h; +} + +sub run_handlers { + my($self, $phase, $o) = @_; + if (defined(wantarray)) { + for my $h ($self->handlers($phase, $o)) { + my $ret = $h->{callback}->($o, $self, $h); + return $ret if $ret; + } + return undef; + } + + for my $h ($self->handlers($phase, $o)) { + $h->{callback}->($o, $self, $h); + } +} + + +# deprecated +sub use_eval { shift->_elem('use_eval', @_); } +sub use_alarm +{ + Carp::carp("LWP::UserAgent->use_alarm(BOOL) is a no-op") + if @_ > 1 && $^W; + ""; +} + + +sub clone +{ + my $self = shift; + my $copy = bless { %$self }, ref $self; # copy most fields + + delete $copy->{handlers}; + delete $copy->{conn_cache}; + + # copy any plain arrays and hashes; known not to need recursive copy + for my $k (qw(proxy no_proxy requests_redirectable ssl_opts)) { + next unless $copy->{$k}; + if (ref($copy->{$k}) eq "ARRAY") { + $copy->{$k} = [ @{$copy->{$k}} ]; + } + elsif (ref($copy->{$k}) eq "HASH") { + $copy->{$k} = { %{$copy->{$k}} }; + } + } + + if ($self->{def_headers}) { + $copy->{def_headers} = $self->{def_headers}->clone; + } + + # re-enable standard handlers + $copy->parse_head($self->parse_head); + + # no easy way to clone the cookie jar; so let's just remove it for now + $copy->cookie_jar(undef); + + $copy; +} + + +sub mirror +{ + my($self, $url, $file) = @_; + + my $request = HTTP::Request->new('GET', $url); + + # If the file exists, add a cache-related header + if ( -e $file ) { + my ($mtime) = ( stat($file) )[9]; + if ($mtime) { + $request->header( 'If-Modified-Since' => HTTP::Date::time2str($mtime) ); + } + } + my $tmpfile = "$file-$$"; + + my $response = $self->request($request, $tmpfile); + if ( $response->header('X-Died') ) { + die $response->header('X-Died'); + } + + # Only fetching a fresh copy of the would be considered success. + # If the file was not modified, "304" would returned, which + # is considered by HTTP::Status to be a "redirect", /not/ "success" + if ( $response->is_success ) { + my @stat = stat($tmpfile) or die "Could not stat tmpfile '$tmpfile': $!"; + my $file_length = $stat[7]; + my ($content_length) = $response->header('Content-length'); + + if ( defined $content_length and $file_length < $content_length ) { + unlink($tmpfile); + die "Transfer truncated: " . "only $file_length out of $content_length bytes received\n"; + } + elsif ( defined $content_length and $file_length > $content_length ) { + unlink($tmpfile); + die "Content-length mismatch: " . "expected $content_length bytes, got $file_length\n"; + } + # The file was the expected length. + else { + # Replace the stale file with a fresh copy + if ( -e $file ) { + # Some DOSish systems fail to rename if the target exists + chmod 0777, $file; + unlink $file; + } + rename( $tmpfile, $file ) + or die "Cannot rename '$tmpfile' to '$file': $!\n"; + + # make sure the file has the same last modification time + if ( my $lm = $response->last_modified ) { + utime $lm, $lm, $file; + } + } + } + # The local copy is fresh enough, so just delete the temp file + else { + unlink($tmpfile); + } + return $response; +} + + +sub _need_proxy { + my($req, $ua) = @_; + return if exists $req->{proxy}; + my $proxy = $ua->{proxy}{$req->uri->scheme} || return; + if ($ua->{no_proxy}) { + if (my $host = eval { $req->uri->host }) { + for my $domain (@{$ua->{no_proxy}}) { + if ($host =~ /\Q$domain\E$/) { + return; + } + } + } + } + $req->{proxy} = $HTTP::URI_CLASS->new($proxy); +} + + +sub proxy +{ + my $self = shift; + my $key = shift; + return map $self->proxy($_, @_), @$key if ref $key; + + Carp::croak("'$key' is not a valid URI scheme") unless $key =~ /^$URI::scheme_re\z/; + my $old = $self->{'proxy'}{$key}; + if (@_) { + my $url = shift; + if (defined($url) && length($url)) { + Carp::croak("Proxy must be specified as absolute URI; '$url' is not") unless $url =~ /^$URI::scheme_re:/; + Carp::croak("Bad http proxy specification '$url'") if $url =~ /^https?:/ && $url !~ m,^https?://\w,; + } + $self->{proxy}{$key} = $url; + $self->set_my_handler("request_preprepare", \&_need_proxy) + } + return $old; +} + + +sub env_proxy { + my ($self) = @_; + require Encode; + require Encode::Locale; + my($k,$v); + while(($k, $v) = each %ENV) { + if ($ENV{REQUEST_METHOD}) { + # Need to be careful when called in the CGI environment, as + # the HTTP_PROXY variable is under control of that other guy. + next if $k =~ /^HTTP_/; + $k = "HTTP_PROXY" if $k eq "CGI_HTTP_PROXY"; + } + $k = lc($k); + next unless $k =~ /^(.*)_proxy$/; + $k = $1; + if ($k eq 'no') { + $self->no_proxy(split(/\s*,\s*/, $v)); + } + else { + # Ignore random _proxy variables, allow only valid schemes + next unless $k =~ /^$URI::scheme_re\z/; + # Ignore xxx_proxy variables if xxx isn't a supported protocol + next unless LWP::Protocol::implementor($k); + $self->proxy($k, Encode::decode(locale => $v)); + } + } +} + + +sub no_proxy { + my($self, @no) = @_; + if (@no) { + push(@{ $self->{'no_proxy'} }, @no); + } + else { + $self->{'no_proxy'} = []; + } +} + + +sub _new_response { + my($request, $code, $message, $content) = @_; + $message ||= HTTP::Status::status_message($code); + my $response = HTTP::Response->new($code, $message); + $response->request($request); + $response->header("Client-Date" => HTTP::Date::time2str(time)); + $response->header("Client-Warning" => "Internal response"); + $response->header("Content-Type" => "text/plain"); + $response->content($content || "$code $message\n"); + return $response; +} + + +1; + +__END__ + +=head1 NAME + +LWP::UserAgent - Web user agent class + +=head1 SYNOPSIS + + require LWP::UserAgent; + + my $ua = LWP::UserAgent->new; + $ua->timeout(10); + $ua->env_proxy; + + my $response = $ua->get('http://search.cpan.org/'); + + if ($response->is_success) { + print $response->decoded_content; # or whatever + } + else { + die $response->status_line; + } + +=head1 DESCRIPTION + +The C<LWP::UserAgent> is a class implementing a web user agent. +C<LWP::UserAgent> objects can be used to dispatch web requests. + +In normal use the application creates an C<LWP::UserAgent> object, and +then configures it with values for timeouts, proxies, name, etc. It +then creates an instance of C<HTTP::Request> for the request that +needs to be performed. This request is then passed to one of the +request method the UserAgent, which dispatches it using the relevant +protocol, and returns a C<HTTP::Response> object. There are +convenience methods for sending the most common request types: get(), +head(), post(), put() and delete(). When using these methods then the +creation of the request object is hidden as shown in the synopsis above. + +The basic approach of the library is to use HTTP style communication +for all protocol schemes. This means that you will construct +C<HTTP::Request> objects and receive C<HTTP::Response> objects even +for non-HTTP resources like I<gopher> and I<ftp>. In order to achieve +even more similarity to HTTP style communications, gopher menus and +file directories are converted to HTML documents. + +=head1 CONSTRUCTOR METHODS + +The following constructor methods are available: + +=over 4 + +=item $ua = LWP::UserAgent->new( %options ) + +This method constructs a new C<LWP::UserAgent> object and returns it. +Key/value pair arguments may be provided to set up the initial state. +The following options correspond to attribute methods described below: + + KEY DEFAULT + ----------- -------------------- + agent "libwww-perl/#.###" + from undef + conn_cache undef + cookie_jar undef + default_headers HTTP::Headers->new + local_address undef + ssl_opts { verify_hostname => 1 } + max_size undef + max_redirect 7 + parse_head 1 + protocols_allowed undef + protocols_forbidden undef + requests_redirectable ['GET', 'HEAD'] + timeout 180 + +The following additional options are also accepted: If the C<env_proxy> option +is passed in with a TRUE value, then proxy settings are read from environment +variables (see env_proxy() method below). If C<env_proxy> isn't provided the +C<PERL_LWP_ENV_PROXY> environment variable controls if env_proxy() is called +during initialization. If the C<keep_alive> option is passed in, then a +C<LWP::ConnCache> is set up (see conn_cache() method below). The C<keep_alive> +value is passed on as the C<total_capacity> for the connection cache. + +=item $ua->clone + +Returns a copy of the LWP::UserAgent object. + +=back + +=head1 ATTRIBUTES + +The settings of the configuration attributes modify the behaviour of the +C<LWP::UserAgent> when it dispatches requests. Most of these can also +be initialized by options passed to the constructor method. + +The following attribute methods are provided. The attribute value is +left unchanged if no argument is given. The return value from each +method is the old attribute value. + +=over + +=item $ua->agent + +=item $ua->agent( $product_id ) + +Get/set the product token that is used to identify the user agent on +the network. The agent value is sent as the "User-Agent" header in +the requests. The default is the string returned by the _agent() +method (see below). + +If the $product_id ends with space then the _agent() string is +appended to it. + +The user agent string should be one or more simple product identifiers +with an optional version number separated by the "/" character. +Examples are: + + $ua->agent('Checkbot/0.4 ' . $ua->_agent); + $ua->agent('Checkbot/0.4 '); # same as above + $ua->agent('Mozilla/5.0'); + $ua->agent(""); # don't identify + +=item $ua->_agent + +Returns the default agent identifier. This is a string of the form +"libwww-perl/#.###", where "#.###" is substituted with the version number +of this library. + +=item $ua->from + +=item $ua->from( $email_address ) + +Get/set the e-mail address for the human user who controls +the requesting user agent. The address should be machine-usable, as +defined in RFC 822. The C<from> value is send as the "From" header in +the requests. Example: + + $ua->from('gaas@cpan.org'); + +The default is to not send a "From" header. See the default_headers() +method for the more general interface that allow any header to be defaulted. + +=item $ua->cookie_jar + +=item $ua->cookie_jar( $cookie_jar_obj ) + +Get/set the cookie jar object to use. The only requirement is that +the cookie jar object must implement the extract_cookies($response) and +add_cookie_header($request) methods. These methods will then be +invoked by the user agent as requests are sent and responses are +received. Normally this will be a C<HTTP::Cookies> object or some +subclass. + +The default is to have no cookie_jar, i.e. never automatically add +"Cookie" headers to the requests. + +Shortcut: If a reference to a plain hash is passed in as the +$cookie_jar_object, then it is replaced with an instance of +C<HTTP::Cookies> that is initialized based on the hash. This form also +automatically loads the C<HTTP::Cookies> module. It means that: + + $ua->cookie_jar({ file => "$ENV{HOME}/.cookies.txt" }); + +is really just a shortcut for: + + require HTTP::Cookies; + $ua->cookie_jar(HTTP::Cookies->new(file => "$ENV{HOME}/.cookies.txt")); + +=item $ua->default_headers + +=item $ua->default_headers( $headers_obj ) + +Get/set the headers object that will provide default header values for +any requests sent. By default this will be an empty C<HTTP::Headers> +object. + +=item $ua->default_header( $field ) + +=item $ua->default_header( $field => $value ) + +This is just a short-cut for $ua->default_headers->header( $field => +$value ). Example: + + $ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable()); + $ua->default_header('Accept-Language' => "no, en"); + +=item $ua->conn_cache + +=item $ua->conn_cache( $cache_obj ) + +Get/set the C<LWP::ConnCache> object to use. See L<LWP::ConnCache> +for details. + +=item $ua->credentials( $netloc, $realm ) + +=item $ua->credentials( $netloc, $realm, $uname, $pass ) + +Get/set the user name and password to be used for a realm. + +The $netloc is a string of the form "<host>:<port>". The username and +password will only be passed to this server. Example: + + $ua->credentials("www.example.com:80", "Some Realm", "foo", "secret"); + +=item $ua->local_address + +=item $ua->local_address( $address ) + +Get/set the local interface to bind to for network connections. The interface +can be specified as a hostname or an IP address. This value is passed as the +C<LocalAddr> argument to L<IO::Socket::INET>. + +=item $ua->max_size + +=item $ua->max_size( $bytes ) + +Get/set the size limit for response content. The default is C<undef>, +which means that there is no limit. If the returned response content +is only partial, because the size limit was exceeded, then a +"Client-Aborted" header will be added to the response. The content +might end up longer than C<max_size> as we abort once appending a +chunk of data makes the length exceed the limit. The "Content-Length" +header, if present, will indicate the length of the full content and +will normally not be the same as C<< length($res->content) >>. + +=item $ua->max_redirect + +=item $ua->max_redirect( $n ) + +This reads or sets the object's limit of how many times it will obey +redirection responses in a given request cycle. + +By default, the value is 7. This means that if you call request() +method and the response is a redirect elsewhere which is in turn a +redirect, and so on seven times, then LWP gives up after that seventh +request. + +=item $ua->parse_head + +=item $ua->parse_head( $boolean ) + +Get/set a value indicating whether we should initialize response +headers from the E<lt>head> section of HTML documents. The default is +TRUE. Do not turn this off, unless you know what you are doing. + +=item $ua->protocols_allowed + +=item $ua->protocols_allowed( \@protocols ) + +This reads (or sets) this user agent's list of protocols that the +request methods will exclusively allow. The protocol names are case +insensitive. + +For example: C<$ua-E<gt>protocols_allowed( [ 'http', 'https'] );> +means that this user agent will I<allow only> those protocols, +and attempts to use this user agent to access URLs with any other +schemes (like "ftp://...") will result in a 500 error. + +To delete the list, call: C<$ua-E<gt>protocols_allowed(undef)> + +By default, an object has neither a C<protocols_allowed> list, nor a +C<protocols_forbidden> list. + +Note that having a C<protocols_allowed> list causes any +C<protocols_forbidden> list to be ignored. + +=item $ua->protocols_forbidden + +=item $ua->protocols_forbidden( \@protocols ) + +This reads (or sets) this user agent's list of protocols that the +request method will I<not> allow. The protocol names are case +insensitive. + +For example: C<$ua-E<gt>protocols_forbidden( [ 'file', 'mailto'] );> +means that this user agent will I<not> allow those protocols, and +attempts to use this user agent to access URLs with those schemes +will result in a 500 error. + +To delete the list, call: C<$ua-E<gt>protocols_forbidden(undef)> + +=item $ua->requests_redirectable + +=item $ua->requests_redirectable( \@requests ) + +This reads or sets the object's list of request names that +C<$ua-E<gt>redirect_ok(...)> will allow redirection for. By +default, this is C<['GET', 'HEAD']>, as per RFC 2616. To +change to include 'POST', consider: + + push @{ $ua->requests_redirectable }, 'POST'; + +=item $ua->show_progress + +=item $ua->show_progress( $boolean ) + +Get/set a value indicating whether a progress bar should be displayed +on the terminal as requests are processed. The default is FALSE. + +=item $ua->timeout + +=item $ua->timeout( $secs ) + +Get/set the timeout value in seconds. The default timeout() value is +180 seconds, i.e. 3 minutes. + +The requests is aborted if no activity on the connection to the server +is observed for C<timeout> seconds. This means that the time it takes +for the complete transaction and the request() method to actually +return might be longer. + +=item $ua->ssl_opts + +=item $ua->ssl_opts( $key ) + +=item $ua->ssl_opts( $key => $value ) + +Get/set the options for SSL connections. Without argument return the list +of options keys currently set. With a single argument return the current +value for the given option. With 2 arguments set the option value and return +the old. Setting an option to the value C<undef> removes this option. + +The options that LWP relates to are: + +=over + +=item C<verify_hostname> => $bool + +When TRUE LWP will for secure protocol schemes ensure it connects to servers +that have a valid certificate matching the expected hostname. If FALSE no +checks are made and you can't be sure that you communicate with the expected peer. +The no checks behaviour was the default for libwww-perl-5.837 and earlier releases. + +This option is initialized from the L<PERL_LWP_SSL_VERIFY_HOSTNAME> environment +variable. If this environment variable isn't set; then C<verify_hostname> +defaults to 1. + +=item C<SSL_ca_file> => $path + +The path to a file containing Certificate Authority certificates. +A default setting for this option is provided by checking the environment +variables C<PERL_LWP_SSL_CA_FILE> and C<HTTPS_CA_FILE> in order. + +=item C<SSL_ca_path> => $path + +The path to a directory containing files containing Certificate Authority +certificates. +A default setting for this option is provided by checking the environment +variables C<PERL_LWP_SSL_CA_PATH> and C<HTTPS_CA_DIR> in order. + +=back + +Other options can be set and are processed directly by the SSL Socket implementation +in use. See L<IO::Socket::SSL> or L<Net::SSL> for details. + +The libwww-perl core no longer bundles protocol plugins for SSL. You will need +to install L<LWP::Protocol::https> separately to enable support for processing +https-URLs. + +=back + +=head2 Proxy attributes + +The following methods set up when requests should be passed via a +proxy server. + +=over + +=item $ua->proxy(\@schemes, $proxy_url) + +=item $ua->proxy($scheme, $proxy_url) + +Set/retrieve proxy URL for a scheme: + + $ua->proxy(['http', 'ftp'], 'http://proxy.sn.no:8001/'); + $ua->proxy('gopher', 'http://proxy.sn.no:8001/'); + +The first form specifies that the URL is to be used for proxying of +access methods listed in the list in the first method argument, +i.e. 'http' and 'ftp'. + +The second form shows a shorthand form for specifying +proxy URL for a single access scheme. + +=item $ua->no_proxy( $domain, ... ) + +Do not proxy requests to the given domains. Calling no_proxy without +any domains clears the list of domains. Eg: + + $ua->no_proxy('localhost', 'example.com'); + +=item $ua->env_proxy + +Load proxy settings from *_proxy environment variables. You might +specify proxies like this (sh-syntax): + + gopher_proxy=http://proxy.my.place/ + wais_proxy=http://proxy.my.place/ + no_proxy="localhost,example.com" + export gopher_proxy wais_proxy no_proxy + +csh or tcsh users should use the C<setenv> command to define these +environment variables. + +On systems with case insensitive environment variables there exists a +name clash between the CGI environment variables and the C<HTTP_PROXY> +environment variable normally picked up by env_proxy(). Because of +this C<HTTP_PROXY> is not honored for CGI scripts. The +C<CGI_HTTP_PROXY> environment variable can be used instead. + +=back + +=head2 Handlers + +Handlers are code that injected at various phases during the +processing of requests. The following methods are provided to manage +the active handlers: + +=over + +=item $ua->add_handler( $phase => \&cb, %matchspec ) + +Add handler to be invoked in the given processing phase. For how to +specify %matchspec see L<HTTP::Config/"Matching">. + +The possible values $phase and the corresponding callback signatures are: + +=over + +=item request_preprepare => sub { my($request, $ua, $h) = @_; ... } + +The handler is called before the C<request_prepare> and other standard +initialization of the request. This can be used to set up headers +and attributes that the C<request_prepare> handler depends on. Proxy +initialization should take place here; but in general don't register +handlers for this phase. + +=item request_prepare => sub { my($request, $ua, $h) = @_; ... } + +The handler is called before the request is sent and can modify the +request any way it see fit. This can for instance be used to add +certain headers to specific requests. + +The method can assign a new request object to $_[0] to replace the +request that is sent fully. + +The return value from the callback is ignored. If an exception is +raised it will abort the request and make the request method return a +"400 Bad request" response. + +=item request_send => sub { my($request, $ua, $h) = @_; ... } + +This handler gets a chance of handling requests before they're sent to the +protocol handlers. It should return an HTTP::Response object if it +wishes to terminate the processing; otherwise it should return nothing. + +The C<response_header> and C<response_data> handlers will not be +invoked for this response, but the C<response_done> will be. + +=item response_header => sub { my($response, $ua, $h) = @_; ... } + +This handler is called right after the response headers have been +received, but before any content data. The handler might set up +handlers for data and might croak to abort the request. + +The handler might set the $response->{default_add_content} value to +control if any received data should be added to the response object +directly. This will initially be false if the $ua->request() method +was called with a $content_file or $content_cb argument; otherwise true. + +=item response_data => sub { my($response, $ua, $h, $data) = @_; ... } + +This handler is called for each chunk of data received for the +response. The handler might croak to abort the request. + +This handler needs to return a TRUE value to be called again for +subsequent chunks for the same request. + +=item response_done => sub { my($response, $ua, $h) = @_; ... } + +The handler is called after the response has been fully received, but +before any redirect handling is attempted. The handler can be used to +extract information or modify the response. + +=item response_redirect => sub { my($response, $ua, $h) = @_; ... } + +The handler is called in $ua->request after C<response_done>. If the +handler returns an HTTP::Request object we'll start over with processing +this request instead. + +=back + +=item $ua->remove_handler( undef, %matchspec ) + +=item $ua->remove_handler( $phase, %matchspec ) + +Remove handlers that match the given %matchspec. If $phase is not +provided remove handlers from all phases. + +Be careful as calling this function with %matchspec that is not +specific enough can remove handlers not owned by you. It's probably +better to use the set_my_handler() method instead. + +The removed handlers are returned. + +=item $ua->set_my_handler( $phase, $cb, %matchspec ) + +Set handlers private to the executing subroutine. Works by defaulting +an C<owner> field to the %matchspec that holds the name of the called +subroutine. You might pass an explicit C<owner> to override this. + +If $cb is passed as C<undef>, remove the handler. + +=item $ua->get_my_handler( $phase, %matchspec ) + +=item $ua->get_my_handler( $phase, %matchspec, $init ) + +Will retrieve the matching handler as hash ref. + +If C<$init> is passed as a TRUE value, create and add the +handler if it's not found. If $init is a subroutine reference, then +it's called with the created handler hash as argument. This sub might +populate the hash with extra fields; especially the callback. If +$init is a hash reference, merge the hashes. + +=item $ua->handlers( $phase, $request ) + +=item $ua->handlers( $phase, $response ) + +Returns the handlers that apply to the given request or response at +the given processing phase. + +=back + +=head1 REQUEST METHODS + +The methods described in this section are used to dispatch requests +via the user agent. The following request methods are provided: + +=over + +=item $ua->get( $url ) + +=item $ua->get( $url , $field_name => $value, ... ) + +This method will dispatch a C<GET> request on the given $url. Further +arguments can be given to initialize the headers of the request. These +are given as separate name/value pairs. The return value is a +response object. See L<HTTP::Response> for a description of the +interface it provides. + +There will still be a response object returned when LWP can't connect to the +server specified in the URL or when other failures in protocol handlers occur. +These internal responses use the standard HTTP status codes, so the responses +can't be differentiated by testing the response status code alone. Error +responses that LWP generates internally will have the "Client-Warning" header +set to the value "Internal response". If you need to differentiate these +internal responses from responses that a remote server actually generates, you +need to test this header value. + +Fields names that start with ":" are special. These will not +initialize headers of the request but will determine how the response +content is treated. The following special field names are recognized: + + :content_file => $filename + :content_cb => \&callback + :read_size_hint => $bytes + +If a $filename is provided with the C<:content_file> option, then the +response content will be saved here instead of in the response +object. If a callback is provided with the C<:content_cb> option then +this function will be called for each chunk of the response content as +it is received from the server. If neither of these options are +given, then the response content will accumulate in the response +object itself. This might not be suitable for very large response +bodies. Only one of C<:content_file> or C<:content_cb> can be +specified. The content of unsuccessful responses will always +accumulate in the response object itself, regardless of the +C<:content_file> or C<:content_cb> options passed in. + +The C<:read_size_hint> option is passed to the protocol module which +will try to read data from the server in chunks of this size. A +smaller value for the C<:read_size_hint> will result in a higher +number of callback invocations. + +The callback function is called with 3 arguments: a chunk of data, a +reference to the response object, and a reference to the protocol +object. The callback can abort the request by invoking die(). The +exception message will show up as the "X-Died" header field in the +response returned by the get() function. + +=item $ua->head( $url ) + +=item $ua->head( $url , $field_name => $value, ... ) + +This method will dispatch a C<HEAD> request on the given $url. +Otherwise it works like the get() method described above. + +=item $ua->post( $url, \%form ) + +=item $ua->post( $url, \@form ) + +=item $ua->post( $url, \%form, $field_name => $value, ... ) + +=item $ua->post( $url, $field_name => $value,... Content => \%form ) + +=item $ua->post( $url, $field_name => $value,... Content => \@form ) + +=item $ua->post( $url, $field_name => $value,... Content => $content ) + +This method will dispatch a C<POST> request on the given $url, with +%form or @form providing the key/value pairs for the fill-in form +content. Additional headers and content options are the same as for +the get() method. + +This method will use the POST() function from C<HTTP::Request::Common> +to build the request. See L<HTTP::Request::Common> for a details on +how to pass form content and other advanced features. + +=item $ua->put( $url, \%form ) + +=item $ua->put( $url, \@form ) + +=item $ua->put( $url, \%form, $field_name => $value, ... ) + +=item $ua->put( $url, $field_name => $value,... Content => \%form ) + +=item $ua->put( $url, $field_name => $value,... Content => \@form ) + +=item $ua->put( $url, $field_name => $value,... Content => $content ) + +This method will dispatch a C<PUT> request on the given $url, with +%form or @form providing the key/value pairs for the fill-in form +content. Additional headers and content options are the same as for +the get() method. + +This method will use the PUT() function from C<HTTP::Request::Common> +to build the request. See L<HTTP::Request::Common> for a details on +how to pass form content and other advanced features. + +=item $ua->delete( $url ) + +=item $ua->delete( $url, $field_name => $value, ... ) + +This method will dispatch a C<DELETE> request on the given $url. Additional +headers and content options are the same as for the get() method. + +This method will use the DELETE() function from C<HTTP::Request::Common> +to build the request. See L<HTTP::Request::Common> for a details on +how to pass form content and other advanced features. + +=item $ua->mirror( $url, $filename ) + +This method will get the document identified by $url and store it in +file called $filename. If the file already exists, then the request +will contain an "If-Modified-Since" header matching the modification +time of the file. If the document on the server has not changed since +this time, then nothing happens. If the document has been updated, it +will be downloaded again. The modification time of the file will be +forced to match that of the server. + +The return value is the response object. + +=item $ua->request( $request ) + +=item $ua->request( $request, $content_file ) + +=item $ua->request( $request, $content_cb ) + +=item $ua->request( $request, $content_cb, $read_size_hint ) + +This method will dispatch the given $request object. Normally this +will be an instance of the C<HTTP::Request> class, but any object with +a similar interface will do. The return value is a response object. +See L<HTTP::Request> and L<HTTP::Response> for a description of the +interface provided by these classes. + +The request() method will process redirects and authentication +responses transparently. This means that it may actually send several +simple requests via the simple_request() method described below. + +The request methods described above; get(), head(), post() and +mirror(), will all dispatch the request they build via this method. +They are convenience methods that simply hides the creation of the +request object for you. + +The $content_file, $content_cb and $read_size_hint all correspond to +options described with the get() method above. + +You are allowed to use a CODE reference as C<content> in the request +object passed in. The C<content> function should return the content +when called. The content can be returned in chunks. The content +function will be invoked repeatedly until it return an empty string to +signal that there is no more content. + +=item $ua->simple_request( $request ) + +=item $ua->simple_request( $request, $content_file ) + +=item $ua->simple_request( $request, $content_cb ) + +=item $ua->simple_request( $request, $content_cb, $read_size_hint ) + +This method dispatches a single request and returns the response +received. Arguments are the same as for request() described above. + +The difference from request() is that simple_request() will not try to +handle redirects or authentication responses. The request() method +will in fact invoke this method for each simple request it sends. + +=item $ua->is_online + +Tries to determine if you have access to the Internet. Returns +TRUE if the built-in heuristics determine that the user agent is +able to access the Internet (over HTTP). See also L<LWP::Online>. + +=item $ua->is_protocol_supported( $scheme ) + +You can use this method to test whether this user agent object supports the +specified C<scheme>. (The C<scheme> might be a string (like 'http' or +'ftp') or it might be an URI object reference.) + +Whether a scheme is supported, is determined by the user agent's +C<protocols_allowed> or C<protocols_forbidden> lists (if any), and by +the capabilities of LWP. I.e., this will return TRUE only if LWP +supports this protocol I<and> it's permitted for this particular +object. + +=back + +=head2 Callback methods + +The following methods will be invoked as requests are processed. These +methods are documented here because subclasses of C<LWP::UserAgent> +might want to override their behaviour. + +=over + +=item $ua->prepare_request( $request ) + +This method is invoked by simple_request(). Its task is to modify the +given $request object by setting up various headers based on the +attributes of the user agent. The return value should normally be the +$request object passed in. If a different request object is returned +it will be the one actually processed. + +The headers affected by the base implementation are; "User-Agent", +"From", "Range" and "Cookie". + +=item $ua->redirect_ok( $prospective_request, $response ) + +This method is called by request() before it tries to follow a +redirection to the request in $response. This should return a TRUE +value if this redirection is permissible. The $prospective_request +will be the request to be sent if this method returns TRUE. + +The base implementation will return FALSE unless the method +is in the object's C<requests_redirectable> list, +FALSE if the proposed redirection is to a "file://..." +URL, and TRUE otherwise. + +=item $ua->get_basic_credentials( $realm, $uri, $isproxy ) + +This is called by request() to retrieve credentials for documents +protected by Basic or Digest Authentication. The arguments passed in +is the $realm provided by the server, the $uri requested and a boolean +flag to indicate if this is authentication against a proxy server. + +The method should return a username and password. It should return an +empty list to abort the authentication resolution attempt. Subclasses +can override this method to prompt the user for the information. An +example of this can be found in C<lwp-request> program distributed +with this library. + +The base implementation simply checks a set of pre-stored member +variables, set up with the credentials() method. + +=item $ua->progress( $status, $request_or_response ) + +This is called frequently as the response is received regardless of +how the content is processed. The method is called with $status +"begin" at the start of processing the request and with $state "end" +before the request method returns. In between these $status will be +the fraction of the response currently received or the string "tick" +if the fraction can't be calculated. + +When $status is "begin" the second argument is the request object, +otherwise it is the response object. + +=back + +=head1 SEE ALSO + +See L<LWP> for a complete overview of libwww-perl5. See L<lwpcook> +and the scripts F<lwp-request> and F<lwp-download> for examples of +usage. + +See L<HTTP::Request> and L<HTTP::Response> for a description of the +message objects dispatched and received. See L<HTTP::Request::Common> +and L<HTML::Form> for other ways to build request objects. + +See L<WWW::Mechanize> and L<WWW::Search> for examples of more +specialized user agents based on C<LWP::UserAgent>. + +=head1 COPYRIGHT + +Copyright 1995-2009 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. diff --git a/lwpcook.pod b/lwpcook.pod new file mode 100644 index 0000000..2002b79 --- /dev/null +++ b/lwpcook.pod @@ -0,0 +1,311 @@ +=head1 NAME + +lwpcook - The libwww-perl cookbook + +=head1 DESCRIPTION + +This document contain some examples that show typical usage of the +libwww-perl library. You should consult the documentation for the +individual modules for more detail. + +All examples should be runnable programs. You can, in most cases, test +the code sections by piping the program text directly to perl. + + + +=head1 GET + +It is very easy to use this library to just fetch documents from the +net. The LWP::Simple module provides the get() function that return +the document specified by its URL argument: + + use LWP::Simple; + $doc = get 'http://search.cpan.org/dist/libwww-perl/'; + +or, as a perl one-liner using the getprint() function: + + perl -MLWP::Simple -e 'getprint "http://search.cpan.org/dist/libwww-perl/"' + +or, how about fetching the latest perl by running this command: + + perl -MLWP::Simple -e ' + getstore "ftp://ftp.sunet.se/pub/lang/perl/CPAN/src/latest.tar.gz", + "perl.tar.gz"' + +You will probably first want to find a CPAN site closer to you by +running something like the following command: + + perl -MLWP::Simple -e 'getprint "http://www.cpan.org/SITES.html"' + +Enough of this simple stuff! The LWP object oriented interface gives +you more control over the request sent to the server. Using this +interface you have full control over headers sent and how you want to +handle the response returned. + + use LWP::UserAgent; + $ua = LWP::UserAgent->new; + $ua->agent("$0/0.1 " . $ua->agent); + # $ua->agent("Mozilla/8.0") # pretend we are very capable browser + + $req = HTTP::Request->new( + GET => 'http://search.cpan.org/dist/libwww-perl/'); + $req->header('Accept' => 'text/html'); + + # send request + $res = $ua->request($req); + + # check the outcome + if ($res->is_success) { + print $res->decoded_content; + } + else { + print "Error: " . $res->status_line . "\n"; + } + +The lwp-request program (alias GET) that is distributed with the +library can also be used to fetch documents from WWW servers. + + + +=head1 HEAD + +If you just want to check if a document is present (i.e. the URL is +valid) try to run code that looks like this: + + use LWP::Simple; + + if (head($url)) { + # ok document exists + } + +The head() function really returns a list of meta-information about +the document. The first three values of the list returned are the +document type, the size of the document, and the age of the document. + +More control over the request or access to all header values returned +require that you use the object oriented interface described for GET +above. Just s/GET/HEAD/g. + + +=head1 POST + +There is no simple procedural interface for posting data to a WWW server. You +must use the object oriented interface for this. The most common POST +operation is to access a WWW form application: + + use LWP::UserAgent; + $ua = LWP::UserAgent->new; + + my $req = HTTP::Request->new( + POST => 'http://rt.cpan.org/Public/Dist/Display.html'); + $req->content_type('application/x-www-form-urlencoded'); + $req->content('Status=Active&Name=libwww-perl'); + + my $res = $ua->request($req); + print $res->as_string; + +Lazy people use the HTTP::Request::Common module to set up a suitable +POST request message (it handles all the escaping issues) and has a +suitable default for the content_type: + + use HTTP::Request::Common qw(POST); + use LWP::UserAgent; + $ua = LWP::UserAgent->new; + + my $req = POST 'http://rt.cpan.org/Public/Dist/Display.html', + [ Status => 'Active', Name => 'libwww-perl' ]; + + print $ua->request($req)->as_string; + +The lwp-request program (alias POST) that is distributed with the +library can also be used for posting data. + + + +=head1 PROXIES + +Some sites use proxies to go through fire wall machines, or just as +cache in order to improve performance. Proxies can also be used for +accessing resources through protocols not supported directly (or +supported badly :-) by the libwww-perl library. + +You should initialize your proxy setting before you start sending +requests: + + use LWP::UserAgent; + $ua = LWP::UserAgent->new; + $ua->env_proxy; # initialize from environment variables + # or + $ua->proxy(ftp => 'http://proxy.myorg.com'); + $ua->proxy(wais => 'http://proxy.myorg.com'); + $ua->no_proxy(qw(no se fi)); + + my $req = HTTP::Request->new(GET => 'wais://xxx.com/'); + print $ua->request($req)->as_string; + +The LWP::Simple interface will call env_proxy() for you automatically. +Applications that use the $ua->env_proxy() method will normally not +use the $ua->proxy() and $ua->no_proxy() methods. + +Some proxies also require that you send it a username/password in +order to let requests through. You should be able to add the +required header, with something like this: + + use LWP::UserAgent; + + $ua = LWP::UserAgent->new; + $ua->proxy(['http', 'ftp'] => 'http://username:password@proxy.myorg.com'); + + $req = HTTP::Request->new('GET',"http://www.perl.com"); + + $res = $ua->request($req); + print $res->decoded_content if $res->is_success; + +Replace C<proxy.myorg.com>, C<username> and +C<password> with something suitable for your site. + + +=head1 ACCESS TO PROTECTED DOCUMENTS + +Documents protected by basic authorization can easily be accessed +like this: + + use LWP::UserAgent; + $ua = LWP::UserAgent->new; + $req = HTTP::Request->new(GET => 'http://www.linpro.no/secret/'); + $req->authorization_basic('aas', 'mypassword'); + print $ua->request($req)->as_string; + +The other alternative is to provide a subclass of I<LWP::UserAgent> that +overrides the get_basic_credentials() method. Study the I<lwp-request> +program for an example of this. + + +=head1 COOKIES + +Some sites like to play games with cookies. By default LWP ignores +cookies provided by the servers it visits. LWP will collect cookies +and respond to cookie requests if you set up a cookie jar. + + use LWP::UserAgent; + use HTTP::Cookies; + + $ua = LWP::UserAgent->new; + $ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt", + autosave => 1)); + + # and then send requests just as you used to do + $res = $ua->request(HTTP::Request->new(GET => "http://no.yahoo.com/")); + print $res->status_line, "\n"; + +As you visit sites that send you cookies to keep, then the file +F<lwpcookies.txt"> will grow. + +=head1 HTTPS + +URLs with https scheme are accessed in exactly the same way as with +http scheme, provided that an SSL interface module for LWP has been +properly installed (see the F<README.SSL> file found in the +libwww-perl distribution for more details). If no SSL interface is +installed for LWP to use, then you will get "501 Protocol scheme +'https' is not supported" errors when accessing such URLs. + +Here's an example of fetching and printing a WWW page using SSL: + + use LWP::UserAgent; + + my $ua = LWP::UserAgent->new; + my $req = HTTP::Request->new(GET => 'https://www.helsinki.fi/'); + my $res = $ua->request($req); + if ($res->is_success) { + print $res->as_string; + } + else { + print "Failed: ", $res->status_line, "\n"; + } + +=head1 MIRRORING + +If you want to mirror documents from a WWW server, then try to run +code similar to this at regular intervals: + + use LWP::Simple; + + %mirrors = ( + 'http://www.sn.no/' => 'sn.html', + 'http://www.perl.com/' => 'perl.html', + 'http://search.cpan.org/distlibwww-perl/' => 'lwp.html', + 'gopher://gopher.sn.no/' => 'gopher.html', + ); + + while (($url, $localfile) = each(%mirrors)) { + mirror($url, $localfile); + } + +Or, as a perl one-liner: + + perl -MLWP::Simple -e 'mirror("http://www.perl.com/", "perl.html")'; + +The document will not be transferred unless it has been updated. + + + +=head1 LARGE DOCUMENTS + +If the document you want to fetch is too large to be kept in memory, +then you have two alternatives. You can instruct the library to write +the document content to a file (second $ua->request() argument is a file +name): + + use LWP::UserAgent; + $ua = LWP::UserAgent->new; + + my $req = HTTP::Request->new(GET => + 'http://www.cpan.org/authors/Gisle_Aas/libwww-perl-6.02.tar.gz'); + $res = $ua->request($req, "libwww-perl.tar.gz"); + if ($res->is_success) { + print "ok\n"; + } + else { + print $res->status_line, "\n"; + } + + +Or you can process the document as it arrives (second $ua->request() +argument is a code reference): + + use LWP::UserAgent; + $ua = LWP::UserAgent->new; + $URL = 'ftp://ftp.unit.no/pub/rfc/rfc-index.txt'; + + my $expected_length; + my $bytes_received = 0; + my $res = + $ua->request(HTTP::Request->new(GET => $URL), + sub { + my($chunk, $res) = @_; + $bytes_received += length($chunk); + unless (defined $expected_length) { + $expected_length = $res->content_length || 0; + } + if ($expected_length) { + printf STDERR "%d%% - ", + 100 * $bytes_received / $expected_length; + } + print STDERR "$bytes_received bytes received\n"; + + # XXX Should really do something with the chunk itself + # print $chunk; + }); + print $res->status_line, "\n"; + + + +=head1 COPYRIGHT + +Copyright 1996-2001, Gisle Aas + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + + diff --git a/lwptut.pod b/lwptut.pod new file mode 100644 index 0000000..3ab5e61 --- /dev/null +++ b/lwptut.pod @@ -0,0 +1,839 @@ +=head1 NAME + +lwptut -- An LWP Tutorial + +=head1 DESCRIPTION + +LWP (short for "Library for WWW in Perl") is a very popular group of +Perl modules for accessing data on the Web. Like most Perl +module-distributions, each of LWP's component modules comes with +documentation that is a complete reference to its interface. However, +there are so many modules in LWP that it's hard to know where to start +looking for information on how to do even the simplest most common +things. + +Really introducing you to using LWP would require a whole book -- a book +that just happens to exist, called I<Perl & LWP>. But this article +should give you a taste of how you can go about some common tasks with +LWP. + + +=head2 Getting documents with LWP::Simple + +If you just want to get what's at a particular URL, the simplest way +to do it is LWP::Simple's functions. + +In a Perl program, you can call its C<get($url)> function. It will try +getting that URL's content. If it works, then it'll return the +content; but if there's some error, it'll return undef. + + my $url = 'http://www.npr.org/programs/fa/?todayDate=current'; + # Just an example: the URL for the most recent /Fresh Air/ show + + use LWP::Simple; + my $content = get $url; + die "Couldn't get $url" unless defined $content; + + # Then go do things with $content, like this: + + if($content =~ m/jazz/i) { + print "They're talking about jazz today on Fresh Air!\n"; + } + else { + print "Fresh Air is apparently jazzless today.\n"; + } + +The handiest variant on C<get> is C<getprint>, which is useful in Perl +one-liners. If it can get the page whose URL you provide, it sends it +to STDOUT; otherwise it complains to STDERR. + + % perl -MLWP::Simple -e "getprint 'http://www.cpan.org/RECENT'" + +That is the URL of a plain text file that lists new files in CPAN in +the past two weeks. You can easily make it part of a tidy little +shell command, like this one that mails you the list of new +C<Acme::> modules: + + % perl -MLWP::Simple -e "getprint 'http://www.cpan.org/RECENT'" \ + | grep "/by-module/Acme" | mail -s "New Acme modules! Joy!" $USER + +There are other useful functions in LWP::Simple, including one function +for running a HEAD request on a URL (useful for checking links, or +getting the last-revised time of a URL), and two functions for +saving/mirroring a URL to a local file. See L<the LWP::Simple +documentation|LWP::Simple> for the full details, or chapter 2 of I<Perl +& LWP> for more examples. + + + +=for comment + ########################################################################## + + + +=head2 The Basics of the LWP Class Model + +LWP::Simple's functions are handy for simple cases, but its functions +don't support cookies or authorization, don't support setting header +lines in the HTTP request, generally don't support reading header lines +in the HTTP response (notably the full HTTP error message, in case of an +error). To get at all those features, you'll have to use the full LWP +class model. + +While LWP consists of dozens of classes, the main two that you have to +understand are L<LWP::UserAgent> and L<HTTP::Response>. LWP::UserAgent +is a class for "virtual browsers" which you use for performing requests, +and L<HTTP::Response> is a class for the responses (or error messages) +that you get back from those requests. + +The basic idiom is C<< $response = $browser->get($url) >>, or more fully +illustrated: + + # Early in your program: + + use LWP 5.64; # Loads all important LWP classes, and makes + # sure your version is reasonably recent. + + my $browser = LWP::UserAgent->new; + + ... + + # Then later, whenever you need to make a get request: + my $url = 'http://www.npr.org/programs/fa/?todayDate=current'; + + my $response = $browser->get( $url ); + die "Can't get $url -- ", $response->status_line + unless $response->is_success; + + die "Hey, I was expecting HTML, not ", $response->content_type + unless $response->content_type eq 'text/html'; + # or whatever content-type you're equipped to deal with + + # Otherwise, process the content somehow: + + if($response->decoded_content =~ m/jazz/i) { + print "They're talking about jazz today on Fresh Air!\n"; + } + else { + print "Fresh Air is apparently jazzless today.\n"; + } + +There are two objects involved: C<$browser>, which holds an object of +class LWP::UserAgent, and then the C<$response> object, which is of +class HTTP::Response. You really need only one browser object per +program; but every time you make a request, you get back a new +HTTP::Response object, which will have some interesting attributes: + +=over + +=item * + +A status code indicating +success or failure +(which you can test with C<< $response->is_success >>). + +=item * + +An HTTP status +line that is hopefully informative if there's failure (which you can +see with C<< $response->status_line >>, +returning something like "404 Not Found"). + +=item * + +A MIME content-type like "text/html", "image/gif", +"application/xml", etc., which you can see with +C<< $response->content_type >> + +=item * + +The actual content of the response, in C<< $response->decoded_content >>. +If the response is HTML, that's where the HTML source will be; if +it's a GIF, then C<< $response->decoded_content >> will be the binary +GIF data. + +=item * + +And dozens of other convenient and more specific methods that are +documented in the docs for L<HTTP::Response>, and its superclasses +L<HTTP::Message> and L<HTTP::Headers>. + +=back + + + +=for comment + ########################################################################## + + + +=head2 Adding Other HTTP Request Headers + +The most commonly used syntax for requests is C<< $response = +$browser->get($url) >>, but in truth, you can add extra HTTP header +lines to the request by adding a list of key-value pairs after the URL, +like so: + + $response = $browser->get( $url, $key1, $value1, $key2, $value2, ... ); + +For example, here's how to send some more Netscape-like headers, in case +you're dealing with a site that would otherwise reject your request: + + + my @ns_headers = ( + 'User-Agent' => 'Mozilla/4.76 [en] (Win98; U)', + 'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*', + 'Accept-Charset' => 'iso-8859-1,*,utf-8', + 'Accept-Language' => 'en-US', + ); + + ... + + $response = $browser->get($url, @ns_headers); + +If you weren't reusing that array, you could just go ahead and do this: + + $response = $browser->get($url, + 'User-Agent' => 'Mozilla/4.76 [en] (Win98; U)', + 'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*', + 'Accept-Charset' => 'iso-8859-1,*,utf-8', + 'Accept-Language' => 'en-US', + ); + +If you were only ever changing the 'User-Agent' line, you could just change +the C<$browser> object's default line from "libwww-perl/5.65" (or the like) +to whatever you like, using the LWP::UserAgent C<agent> method: + + $browser->agent('Mozilla/4.76 [en] (Win98; U)'); + + + +=for comment + ########################################################################## + + + +=head2 Enabling Cookies + +A default LWP::UserAgent object acts like a browser with its cookies +support turned off. There are various ways of turning it on, by setting +its C<cookie_jar> attribute. A "cookie jar" is an object representing +a little database of all +the HTTP cookies that a browser can know about. It can correspond to a +file on disk (the way Netscape uses its F<cookies.txt> file), or it can +be just an in-memory object that starts out empty, and whose collection of +cookies will disappear once the program is finished running. + +To give a browser an in-memory empty cookie jar, you set its C<cookie_jar> +attribute like so: + + $browser->cookie_jar({}); + +To give it a copy that will be read from a file on disk, and will be saved +to it when the program is finished running, set the C<cookie_jar> attribute +like this: + + use HTTP::Cookies; + $browser->cookie_jar( HTTP::Cookies->new( + 'file' => '/some/where/cookies.lwp', + # where to read/write cookies + 'autosave' => 1, + # save it to disk when done + )); + +That file will be in LWP-specific format. If you want to access the +cookies in your Netscape cookies file, you can use the +HTTP::Cookies::Netscape class: + + use HTTP::Cookies; + # yes, loads HTTP::Cookies::Netscape too + + $browser->cookie_jar( HTTP::Cookies::Netscape->new( + 'file' => 'c:/Program Files/Netscape/Users/DIR-NAME-HERE/cookies.txt', + # where to read cookies + )); + +You could add an C<< 'autosave' => 1 >> line as further above, but at +time of writing, it's uncertain whether Netscape might discard some of +the cookies you could be writing back to disk. + + + +=for comment + ########################################################################## + + + +=head2 Posting Form Data + +Many HTML forms send data to their server using an HTTP POST request, which +you can send with this syntax: + + $response = $browser->post( $url, + [ + formkey1 => value1, + formkey2 => value2, + ... + ], + ); + +Or if you need to send HTTP headers: + + $response = $browser->post( $url, + [ + formkey1 => value1, + formkey2 => value2, + ... + ], + headerkey1 => value1, + headerkey2 => value2, + ); + +For example, the following program makes a search request to AltaVista +(by sending some form data via an HTTP POST request), and extracts from +the HTML the report of the number of matches: + + use strict; + use warnings; + use LWP 5.64; + my $browser = LWP::UserAgent->new; + + my $word = 'tarragon'; + + my $url = 'http://search.yahoo.com/yhs/search'; + my $response = $browser->post( $url, + [ 'q' => $word, # the Altavista query string + 'fr' => 'altavista', 'pg' => 'q', 'avkw' => 'tgz', 'kl' => 'XX', + ] + ); + die "$url error: ", $response->status_line + unless $response->is_success; + die "Weird content type at $url -- ", $response->content_type + unless $response->content_is_html; + + if( $response->decoded_content =~ m{([0-9,]+)(?:<.*?>)? results for} ) { + # The substring will be like "996,000</strong> results for" + print "$word: $1\n"; + } + else { + print "Couldn't find the match-string in the response\n"; + } + + + +=for comment + ########################################################################## + + + +=head2 Sending GET Form Data + +Some HTML forms convey their form data not by sending the data +in an HTTP POST request, but by making a normal GET request with +the data stuck on the end of the URL. For example, if you went to +C<www.imdb.com> and ran a search on "Blade Runner", the URL you'd see +in your browser window would be: + + http://www.imdb.com/find?s=all&q=Blade+Runner + +To run the same search with LWP, you'd use this idiom, which involves +the URI class: + + use URI; + my $url = URI->new( 'http://www.imdb.com/find' ); + # makes an object representing the URL + + $url->query_form( # And here the form data pairs: + 'q' => 'Blade Runner', + 's' => 'all', + ); + + my $response = $browser->get($url); + +See chapter 5 of I<Perl & LWP> for a longer discussion of HTML forms +and of form data, and chapters 6 through 9 for a longer discussion of +extracting data from HTML. + + + +=head2 Absolutizing URLs + +The URI class that we just mentioned above provides all sorts of methods +for accessing and modifying parts of URLs (such as asking sort of URL it +is with C<< $url->scheme >>, and asking what host it refers to with C<< +$url->host >>, and so on, as described in L<the docs for the URI +class|URI>. However, the methods of most immediate interest +are the C<query_form> method seen above, and now the C<new_abs> method +for taking a probably-relative URL string (like "../foo.html") and getting +back an absolute URL (like "http://www.perl.com/stuff/foo.html"), as +shown here: + + use URI; + $abs = URI->new_abs($maybe_relative, $base); + +For example, consider this program that matches URLs in the HTML +list of new modules in CPAN: + + use strict; + use warnings; + use LWP; + my $browser = LWP::UserAgent->new; + + my $url = 'http://www.cpan.org/RECENT.html'; + my $response = $browser->get($url); + die "Can't get $url -- ", $response->status_line + unless $response->is_success; + + my $html = $response->decoded_content; + while( $html =~ m/<A HREF=\"(.*?)\"/g ) { + print "$1\n"; + } + +When run, it emits output that starts out something like this: + + MIRRORING.FROM + RECENT + RECENT.html + authors/00whois.html + authors/01mailrc.txt.gz + authors/id/A/AA/AASSAD/CHECKSUMS + ... + +However, if you actually want to have those be absolute URLs, you +can use the URI module's C<new_abs> method, by changing the C<while> +loop to this: + + while( $html =~ m/<A HREF=\"(.*?)\"/g ) { + print URI->new_abs( $1, $response->base ) ,"\n"; + } + +(The C<< $response->base >> method from L<HTTP::Message|HTTP::Message> +is for returning what URL +should be used for resolving relative URLs -- it's usually just +the same as the URL that you requested.) + +That program then emits nicely absolute URLs: + + http://www.cpan.org/MIRRORING.FROM + http://www.cpan.org/RECENT + http://www.cpan.org/RECENT.html + http://www.cpan.org/authors/00whois.html + http://www.cpan.org/authors/01mailrc.txt.gz + http://www.cpan.org/authors/id/A/AA/AASSAD/CHECKSUMS + ... + +See chapter 4 of I<Perl & LWP> for a longer discussion of URI objects. + +Of course, using a regexp to match hrefs is a bit simplistic, and for +more robust programs, you'll probably want to use an HTML-parsing module +like L<HTML::LinkExtor> or L<HTML::TokeParser> or even maybe +L<HTML::TreeBuilder>. + + + + +=for comment + ########################################################################## + +=head2 Other Browser Attributes + +LWP::UserAgent objects have many attributes for controlling how they +work. Here are a few notable ones: + +=over + +=item * + +C<< $browser->timeout(15); >> + +This sets this browser object to give up on requests that don't answer +within 15 seconds. + + +=item * + +C<< $browser->protocols_allowed( [ 'http', 'gopher'] ); >> + +This sets this browser object to not speak any protocols other than HTTP +and gopher. If it tries accessing any other kind of URL (like an "ftp:" +or "mailto:" or "news:" URL), then it won't actually try connecting, but +instead will immediately return an error code 500, with a message like +"Access to 'ftp' URIs has been disabled". + + +=item * + +C<< use LWP::ConnCache; $browser->conn_cache(LWP::ConnCache->new()); >> + +This tells the browser object to try using the HTTP/1.1 "Keep-Alive" +feature, which speeds up requests by reusing the same socket connection +for multiple requests to the same server. + + +=item * + +C<< $browser->agent( 'SomeName/1.23 (more info here maybe)' ) >> + +This changes how the browser object will identify itself in +the default "User-Agent" line is its HTTP requests. By default, +it'll send "libwww-perl/I<versionnumber>", like +"libwww-perl/5.65". You can change that to something more descriptive +like this: + + $browser->agent( 'SomeName/3.14 (contact@robotplexus.int)' ); + +Or if need be, you can go in disguise, like this: + + $browser->agent( 'Mozilla/4.0 (compatible; MSIE 5.12; Mac_PowerPC)' ); + + +=item * + +C<< push @{ $ua->requests_redirectable }, 'POST'; >> + +This tells this browser to obey redirection responses to POST requests +(like most modern interactive browsers), even though the HTTP RFC says +that should not normally be done. + + +=back + + +For more options and information, see L<the full documentation for +LWP::UserAgent|LWP::UserAgent>. + + + +=for comment + ########################################################################## + + + +=head2 Writing Polite Robots + +If you want to make sure that your LWP-based program respects F<robots.txt> +files and doesn't make too many requests too fast, you can use the LWP::RobotUA +class instead of the LWP::UserAgent class. + +LWP::RobotUA class is just like LWP::UserAgent, and you can use it like so: + + use LWP::RobotUA; + my $browser = LWP::RobotUA->new('YourSuperBot/1.34', 'you@yoursite.com'); + # Your bot's name and your email address + + my $response = $browser->get($url); + +But HTTP::RobotUA adds these features: + + +=over + +=item * + +If the F<robots.txt> on C<$url>'s server forbids you from accessing +C<$url>, then the C<$browser> object (assuming it's of class LWP::RobotUA) +won't actually request it, but instead will give you back (in C<$response>) a 403 error +with a message "Forbidden by robots.txt". That is, if you have this line: + + die "$url -- ", $response->status_line, "\nAborted" + unless $response->is_success; + +then the program would die with an error message like this: + + http://whatever.site.int/pith/x.html -- 403 Forbidden by robots.txt + Aborted at whateverprogram.pl line 1234 + +=item * + +If this C<$browser> object sees that the last time it talked to +C<$url>'s server was too recently, then it will pause (via C<sleep>) to +avoid making too many requests too often. How long it will pause for, is +by default one minute -- but you can control it with the C<< +$browser->delay( I<minutes> ) >> attribute. + +For example, this code: + + $browser->delay( 7/60 ); + +...means that this browser will pause when it needs to avoid talking to +any given server more than once every 7 seconds. + +=back + +For more options and information, see L<the full documentation for +LWP::RobotUA|LWP::RobotUA>. + + + + + +=for comment + ########################################################################## + +=head2 Using Proxies + +In some cases, you will want to (or will have to) use proxies for +accessing certain sites and/or using certain protocols. This is most +commonly the case when your LWP program is running (or could be running) +on a machine that is behind a firewall. + +To make a browser object use proxies that are defined in the usual +environment variables (C<HTTP_PROXY>, etc.), just call the C<env_proxy> +on a user-agent object before you go making any requests on it. +Specifically: + + use LWP::UserAgent; + my $browser = LWP::UserAgent->new; + + # And before you go making any requests: + $browser->env_proxy; + +For more information on proxy parameters, see L<the LWP::UserAgent +documentation|LWP::UserAgent>, specifically the C<proxy>, C<env_proxy>, +and C<no_proxy> methods. + + + +=for comment + ########################################################################## + +=head2 HTTP Authentication + +Many web sites restrict access to documents by using "HTTP +Authentication". This isn't just any form of "enter your password" +restriction, but is a specific mechanism where the HTTP server sends the +browser an HTTP code that says "That document is part of a protected +'realm', and you can access it only if you re-request it and add some +special authorization headers to your request". + +For example, the Unicode.org admins stop email-harvesting bots from +harvesting the contents of their mailing list archives, by protecting +them with HTTP Authentication, and then publicly stating the username +and password (at C<http://www.unicode.org/mail-arch/>) -- namely +username "unicode-ml" and password "unicode". + +For example, consider this URL, which is part of the protected +area of the web site: + + http://www.unicode.org/mail-arch/unicode-ml/y2002-m08/0067.html + +If you access that with a browser, you'll get a prompt +like +"Enter username and password for 'Unicode-MailList-Archives' at server +'www.unicode.org'". + +In LWP, if you just request that URL, like this: + + use LWP; + my $browser = LWP::UserAgent->new; + + my $url = + 'http://www.unicode.org/mail-arch/unicode-ml/y2002-m08/0067.html'; + my $response = $browser->get($url); + + die "Error: ", $response->header('WWW-Authenticate') || 'Error accessing', + # ('WWW-Authenticate' is the realm-name) + "\n ", $response->status_line, "\n at $url\n Aborting" + unless $response->is_success; + +Then you'll get this error: + + Error: Basic realm="Unicode-MailList-Archives" + 401 Authorization Required + at http://www.unicode.org/mail-arch/unicode-ml/y2002-m08/0067.html + Aborting at auth1.pl line 9. [or wherever] + +...because the C<$browser> doesn't know any the username and password +for that realm ("Unicode-MailList-Archives") at that host +("www.unicode.org"). The simplest way to let the browser know about this +is to use the C<credentials> method to let it know about a username and +password that it can try using for that realm at that host. The syntax is: + + $browser->credentials( + 'servername:portnumber', + 'realm-name', + 'username' => 'password' + ); + +In most cases, the port number is 80, the default TCP/IP port for HTTP; and +you usually call the C<credentials> method before you make any requests. +For example: + + $browser->credentials( + 'reports.mybazouki.com:80', + 'web_server_usage_reports', + 'plinky' => 'banjo123' + ); + +So if we add the following to the program above, right after the C<< +$browser = LWP::UserAgent->new; >> line... + + $browser->credentials( # add this to our $browser 's "key ring" + 'www.unicode.org:80', + 'Unicode-MailList-Archives', + 'unicode-ml' => 'unicode' + ); + +...then when we run it, the request succeeds, instead of causing the +C<die> to be called. + + + +=for comment + ########################################################################## + +=head2 Accessing HTTPS URLs + +When you access an HTTPS URL, it'll work for you just like an HTTP URL +would -- if your LWP installation has HTTPS support (via an appropriate +Secure Sockets Layer library). For example: + + use LWP; + my $url = 'https://www.paypal.com/'; # Yes, HTTPS! + my $browser = LWP::UserAgent->new; + my $response = $browser->get($url); + die "Error at $url\n ", $response->status_line, "\n Aborting" + unless $response->is_success; + print "Whee, it worked! I got that ", + $response->content_type, " document!\n"; + +If your LWP installation doesn't have HTTPS support set up, then the +response will be unsuccessful, and you'll get this error message: + + Error at https://www.paypal.com/ + 501 Protocol scheme 'https' is not supported + Aborting at paypal.pl line 7. [or whatever program and line] + +If your LWP installation I<does> have HTTPS support installed, then the +response should be successful, and you should be able to consult +C<$response> just like with any normal HTTP response. + +For information about installing HTTPS support for your LWP +installation, see the helpful F<README.SSL> file that comes in the +libwww-perl distribution. + + +=for comment + ########################################################################## + + + +=head2 Getting Large Documents + +When you're requesting a large (or at least potentially large) document, +a problem with the normal way of using the request methods (like C<< +$response = $browser->get($url) >>) is that the response object in +memory will have to hold the whole document -- I<in memory>. If the +response is a thirty megabyte file, this is likely to be quite an +imposition on this process's memory usage. + +A notable alternative is to have LWP save the content to a file on disk, +instead of saving it up in memory. This is the syntax to use: + + $response = $ua->get($url, + ':content_file' => $filespec, + ); + +For example, + + $response = $ua->get('http://search.cpan.org/', + ':content_file' => '/tmp/sco.html' + ); + +When you use this C<:content_file> option, the C<$response> will have +all the normal header lines, but C<< $response->content >> will be +empty. + +Note that this ":content_file" option isn't supported under older +versions of LWP, so you should consider adding C<use LWP 5.66;> to check +the LWP version, if you think your program might run on systems with +older versions. + +If you need to be compatible with older LWP versions, then use +this syntax, which does the same thing: + + use HTTP::Request::Common; + $response = $ua->request( GET($url), $filespec ); + + +=for comment + ########################################################################## + + +=head1 SEE ALSO + +Remember, this article is just the most rudimentary introduction to +LWP -- to learn more about LWP and LWP-related tasks, you really +must read from the following: + +=over + +=item * + +L<LWP::Simple> -- simple functions for getting/heading/mirroring URLs + +=item * + +L<LWP> -- overview of the libwww-perl modules + +=item * + +L<LWP::UserAgent> -- the class for objects that represent "virtual browsers" + +=item * + +L<HTTP::Response> -- the class for objects that represent the response to +a LWP response, as in C<< $response = $browser->get(...) >> + +=item * + +L<HTTP::Message> and L<HTTP::Headers> -- classes that provide more methods +to HTTP::Response. + +=item * + +L<URI> -- class for objects that represent absolute or relative URLs + +=item * + +L<URI::Escape> -- functions for URL-escaping and URL-unescaping strings +(like turning "this & that" to and from "this%20%26%20that"). + +=item * + +L<HTML::Entities> -- functions for HTML-escaping and HTML-unescaping strings +(like turning "C. & E. BrontE<euml>" to and from "C. & E. Brontë") + +=item * + +L<HTML::TokeParser> and L<HTML::TreeBuilder> -- classes for parsing HTML + +=item * + +L<HTML::LinkExtor> -- class for finding links in HTML documents + +=item * + +The book I<Perl & LWP> by Sean M. Burke. O'Reilly & Associates, +2002. ISBN: 0-596-00178-9, L<http://oreilly.com/catalog/perllwp/>. The +whole book is also available free online: +L<http://lwp.interglacial.com>. + +=back + + +=head1 COPYRIGHT + +Copyright 2002, Sean M. Burke. You can redistribute this document and/or +modify it, but only under the same terms as Perl itself. + +=head1 AUTHOR + +Sean M. Burke C<sburke@cpan.org> + +=for comment + ########################################################################## + +=cut + +# End of Pod diff --git a/t/base/protocols.t b/t/base/protocols.t new file mode 100644 index 0000000..db5bbf4 --- /dev/null +++ b/t/base/protocols.t @@ -0,0 +1,17 @@ +use Test; +plan tests => 6; + +use LWP::UserAgent; +$ua = LWP::UserAgent->new(); + +$ua->protocols_forbidden(['hTtP']); +ok(scalar(@{$ua->protocols_forbidden()}), 1); +ok(@{$ua->protocols_forbidden()}[0], 'hTtP'); + +$response = $ua->get('http://www.cpan.org/'); +ok($response->is_error()); +ok(!$ua->is_protocol_supported('http')); +ok(!$ua->protocols_allowed()); + +$ua->protocols_forbidden(undef); +ok(!$ua->protocols_forbidden()); diff --git a/t/base/proxy.t b/t/base/proxy.t new file mode 100644 index 0000000..69a38ea --- /dev/null +++ b/t/base/proxy.t @@ -0,0 +1,17 @@ +#!perl -w + +use strict; +use Test; + +plan tests => 2; + +use LWP::UserAgent; + +for my $varname ( qw(ABSURDLY_NAMED_PROXY MY_PROXY) ) { + + $ENV{ $varname } = "foobar"; + + my $ua = LWP::UserAgent->new; + eval { $ua->env_proxy(); }; + ok($@, ""); +} diff --git a/t/base/ua.t b/t/base/ua.t new file mode 100644 index 0000000..ddc1897 --- /dev/null +++ b/t/base/ua.t @@ -0,0 +1,109 @@ +#!perl -w + +use strict; +use Test; + +plan tests => 35; + +use LWP::UserAgent; + +# Prevent environment from interfering with test: +delete $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME}; +delete $ENV{HTTPS_CA_FILE}; +delete $ENV{HTTPS_CA_DIR}; +delete $ENV{PERL_LWP_SSL_CA_FILE}; +delete $ENV{PERL_LWP_SSL_CA_PATH}; +delete $ENV{PERL_LWP_ENV_PROXY}; + +my $ua = LWP::UserAgent->new; +my $clone = $ua->clone; + +ok($ua->agent =~ /^libwww-perl/); +ok(!defined $ua->proxy(ftp => "http://www.sol.no")); +ok($ua->proxy("ftp"), "http://www.sol.no"); + +my @a = $ua->proxy([qw(ftp http wais)], "http://proxy.foo.com"); +for (@a) { $_ = "undef" unless defined; } + +ok("@a", "http://www.sol.no undef undef"); +ok($ua->proxy("http"), "http://proxy.foo.com"); +ok(ref($ua->default_headers), "HTTP::Headers"); + +$ua->default_header("Foo" => "bar", "Multi" => [1, 2]); +ok($ua->default_headers->header("Foo"), "bar"); +ok($ua->default_header("Foo"), "bar"); + +# Try it +$ua->proxy(http => "loopback:"); +$ua->agent("foo/0.1"); + +ok($ua->get("http://www.example.com", x => "y")->content, <<EOT); +GET http://www.example.com +User-Agent: foo/0.1 +Foo: bar +Multi: 1 +Multi: 2 +X: y + +EOT + +ok(ref($clone->{proxy}), 'HASH'); + +ok($ua->proxy(http => undef), "loopback:"); +ok($ua->proxy('http'), undef); + +my $res = $ua->get("data:text/html,%3Chtml%3E%3Chead%3E%3Cmeta%20http-equiv%3D%22Content-Script-Type%22%20content%3D%22text%2Fjavascript%22%3E%3Cmeta%20http-equiv%3D%22Content-Style-Type%22%20content%3D%22text%2Fcss%22%3E%3C%2Fhead%3E%3C%2Fhtml%3E"); +ok($res->header("Content-Style-Type", "text/css")); +ok($res->header("Content-Script-Type", "text/javascript")); + +ok(join(":", $ua->ssl_opts), "verify_hostname"); +ok($ua->ssl_opts("verify_hostname"), 1); +ok($ua->ssl_opts(verify_hostname => 0), 1); +ok($ua->ssl_opts("verify_hostname"), 0); +ok($ua->ssl_opts(verify_hostname => undef), 0); +ok($ua->ssl_opts("verify_hostname"), undef); +ok(join(":", $ua->ssl_opts), ""); + +$ua = LWP::UserAgent->new(ssl_opts => {}); +ok($ua->ssl_opts("verify_hostname"), 1); + +$ua = LWP::UserAgent->new(ssl_opts => { verify_hostname => 0 }); +ok($ua->ssl_opts("verify_hostname"), 0); + +$ua = LWP::UserAgent->new(ssl_opts => { SSL_ca_file => 'cert.dat'}); +ok($ua->ssl_opts("verify_hostname"), 1); +ok($ua->ssl_opts("SSL_ca_file"), 'cert.dat'); + +$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 1; +$ua = LWP::UserAgent->new(); +ok($ua->ssl_opts("verify_hostname"), 1); + +$ua = LWP::UserAgent->new(ssl_opts => {}); +ok($ua->ssl_opts("verify_hostname"), 1); + +$ua = LWP::UserAgent->new(ssl_opts => { verify_hostname => 0 }); +ok($ua->ssl_opts("verify_hostname"), 0); + +$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0; +$ua = LWP::UserAgent->new(); +ok($ua->ssl_opts("verify_hostname"), 0); + +$ua = LWP::UserAgent->new(ssl_opts => {}); +ok($ua->ssl_opts("verify_hostname"), 0); + +$ua = LWP::UserAgent->new(ssl_opts => { verify_hostname => 1 }); +ok($ua->ssl_opts("verify_hostname"), 1); + +delete @ENV{grep /_proxy$/i, keys %ENV}; # clean out any proxy vars + +$ENV{http_proxy} = "http://example.com"; +$ua = LWP::UserAgent->new; +ok($ua->proxy('http'), undef); +$ua = LWP::UserAgent->new(env_proxy => 1);; +ok($ua->proxy('http'), "http://example.com"); + +$ENV{PERL_LWP_ENV_PROXY} = 1; +$ua = LWP::UserAgent->new(); +ok($ua->proxy('http'), "http://example.com"); +$ua = LWP::UserAgent->new(env_proxy => 0); +ok($ua->proxy('http'), undef); diff --git a/t/live/apache-http10.t b/t/live/apache-http10.t new file mode 100644 index 0000000..f6cf6bc --- /dev/null +++ b/t/live/apache-http10.t @@ -0,0 +1,16 @@ +#!perl -w + +use strict; +use Test; +plan tests => 1; + +use LWP::UserAgent; +my $ua = LWP::UserAgent->new; + +require HTTP::Request; +my $req = HTTP::Request->new(TRACE => "http://www.apache.org/"); +$req->protocol("HTTP/1.0"); +my $res = $ua->simple_request($req); +ok($res->content, qr/HTTP\/1.0/); + +$res->dump(prefix => "# "); diff --git a/t/live/jigsaw/auth-b.t b/t/live/jigsaw/auth-b.t new file mode 100644 index 0000000..cc2e24b --- /dev/null +++ b/t/live/jigsaw/auth-b.t @@ -0,0 +1,53 @@ +use strict; +use Test; + +plan tests => 5; + +use LWP::UserAgent; + +my $ua = LWP::UserAgent->new(keep_alive => 1); + +my $req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/Basic/"); + +my $res = $ua->request($req); + +#print $res->as_string; + +ok($res->code, 401); + +$req->authorization_basic('guest', 'guest'); +$res = $ua->simple_request($req); + +print $req->as_string, "\n"; + +#print $res->as_string; +ok($res->code, 200); +ok($res->content =~ /Your browser made it!/); + +{ + package MyUA; + use vars qw(@ISA); + @ISA = qw(LWP::UserAgent); + + my @try = (['foo', 'bar'], ['', ''], ['guest', ''], ['guest', 'guest']); + + sub get_basic_credentials { + my($self,$realm, $uri, $proxy) = @_; + #print "$realm/$uri/$proxy\n"; + my $p = shift @try; + #print join("/", @$p), "\n"; + return @$p; + } + +} + +$ua = MyUA->new(keep_alive => 1); + +$req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/Basic/"); +$res = $ua->request($req); + +#print $res->as_string; + +ok($res->content =~ /Your browser made it!/); +ok($res->header("Client-Response-Num"), 5); + diff --git a/t/live/jigsaw/auth-d.t b/t/live/jigsaw/auth-d.t new file mode 100644 index 0000000..8782613 --- /dev/null +++ b/t/live/jigsaw/auth-d.t @@ -0,0 +1,33 @@ +use strict; +use Test; + +plan tests => 2; + +use LWP::UserAgent; + +{ + package MyUA; + use vars qw(@ISA); + @ISA = qw(LWP::UserAgent); + + my @try = (['foo', 'bar'], ['', ''], ['guest', ''], ['guest', 'guest']); + + sub get_basic_credentials { + my($self,$realm, $uri, $proxy) = @_; + print "$realm:$uri:$proxy => "; + my $p = shift @try; + print join("/", @$p), "\n"; + return @$p; + } + +} + +my $ua = MyUA->new(keep_alive => 1); + +my $req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/Digest/"); +my $res = $ua->request($req); + +#print $res->as_string; + +ok($res->content =~ /Your browser made it!/); +ok($res->header("Client-Response-Num"), 5); diff --git a/t/live/jigsaw/chunk.t b/t/live/jigsaw/chunk.t new file mode 100644 index 0000000..703188c --- /dev/null +++ b/t/live/jigsaw/chunk.t @@ -0,0 +1,37 @@ +print "1..5\n"; + +use strict; +use LWP::UserAgent; + +my $ua = LWP::UserAgent->new(keep_alive => 1); + +my $req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/ChunkedScript"); +my $res = $ua->request($req); + +print "not " unless $res->is_success && $res->content_type eq "text/plain"; +print "ok 1\n"; + +print "not " unless $res->header("Client-Transfer-Encoding") eq "chunked"; +print "ok 2\n"; + +for (${$res->content_ref}) { + s/\015?\012/\n/g; + /Below this line, is 1000 repeated lines of 0-9/ || die; + s/^.*?-----+\n//s; + + my @lines = split(/^/); + print "not " if @lines != 1000; + print "ok 3\n"; + + # check that all lines are the same + my $first = shift(@lines); + my $no_they_are_not; + for (@lines) { + $no_they_are_not++ if $_ ne $first; + } + print "not " if $no_they_are_not; + print "ok 4\n"; + + print "not " unless $first =~ /^\d+$/; + print "ok 5\n"; +} diff --git a/t/live/jigsaw/md5-get.t b/t/live/jigsaw/md5-get.t new file mode 100644 index 0000000..e30bdfb --- /dev/null +++ b/t/live/jigsaw/md5-get.t @@ -0,0 +1,29 @@ +print "1..2\n"; + +use strict; +use LWP::UserAgent; + +my $ua = LWP::UserAgent->new(keep_alive => 1); + +my $res = $ua->get( + "http://jigsaw.w3.org/HTTP/h-content-md5.html", + "TE" => "deflate", +); + +use Digest::MD5 qw(md5_base64); +print "not " unless $res->header("Content-MD5") eq md5_base64($res->content) . "=="; +print "ok 1\n"; + +print $res->as_string; + +my $etag = $res->header("etag"); + +$res = $ua->get( + "http://jigsaw.w3.org/HTTP/h-content-md5.html", + "TE" => "deflate", + "If-None-Match" => $etag, +); +print $res->as_string; + +print "not " unless $res->code eq "304" && $res->header("Client-Response-Num") == 2; +print "ok 2\n"; diff --git a/t/live/jigsaw/md5.t b/t/live/jigsaw/md5.t new file mode 100644 index 0000000..edee340 --- /dev/null +++ b/t/live/jigsaw/md5.t @@ -0,0 +1,26 @@ +print "1..2\n"; + +use strict; +use LWP::UserAgent; + +my $ua = LWP::UserAgent->new(keep_alive => 1); + +my $req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/h-content-md5.html"); +$req->header("TE", "deflate"); + +my $res = $ua->request($req); + +use Digest::MD5 qw(md5_base64); +print "not " unless $res->header("Content-MD5") eq md5_base64($res->content) . "=="; +print "ok 1\n"; + +print $res->as_string; + +my $etag = $res->header("etag"); +$req->header("If-None-Match" => $etag); + +$res = $ua->request($req); +print $res->as_string; + +print "not " unless $res->code eq "304" && $res->header("Client-Response-Num") == 2; +print "ok 2\n"; diff --git a/t/live/jigsaw/neg-get.t b/t/live/jigsaw/neg-get.t new file mode 100644 index 0000000..eccd9d3 --- /dev/null +++ b/t/live/jigsaw/neg-get.t @@ -0,0 +1,16 @@ +print "1..1\n"; + +use strict; +use LWP::UserAgent; + +my $ua = LWP::UserAgent->new(keep_alive => 1); + +my $res = $ua->get( + "http://jigsaw.w3.org/HTTP/neg", + Connection => "close", +); + +print $res->as_string, "\n"; + +print "not " unless $res->code == 300; +print "ok 1\n"; diff --git a/t/live/jigsaw/neg.t b/t/live/jigsaw/neg.t new file mode 100644 index 0000000..e33a2a8 --- /dev/null +++ b/t/live/jigsaw/neg.t @@ -0,0 +1,15 @@ +print "1..1\n"; + +use strict; +use LWP::UserAgent; + +my $ua = LWP::UserAgent->new(keep_alive => 1); + +my $req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/neg"); +$req->header(Connection => "close"); +my $res = $ua->request($req); + +print $res->as_string, "\n"; + +print "not " unless $res->code == 300; +print "ok 1\n"; diff --git a/t/live/jigsaw/te.t b/t/live/jigsaw/te.t new file mode 100644 index 0000000..a5a7c0e --- /dev/null +++ b/t/live/jigsaw/te.t @@ -0,0 +1,33 @@ +#!perl -w + +print "1..4\n"; + +use strict; +use LWP::UserAgent; + +my $ua = LWP::UserAgent->new(keep_alive => 1); + + +my $content; +my $testno = 1; + +for my $te (undef, "", "deflate", "gzip", "trailers, deflate;q=0.4, identity;q=0.1") { + my $req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/TE/foo.txt"); + if (defined $te) { + $req->header(TE => $te); + $req->header(Connection => "TE"); + } + print $req->as_string; + + my $res = $ua->request($req); + if (defined $content) { + print "not " unless $content eq $res->content; + print "ok $testno\n\n"; + $testno++; + } + else { + $content = $res->content; + } + $res->content(""); + print $res->as_string; +} diff --git a/t/live/online.t b/t/live/online.t new file mode 100644 index 0000000..750e94b --- /dev/null +++ b/t/live/online.t @@ -0,0 +1,13 @@ +#!perl -w + +use strict; +use Test; +plan tests => 2; + +use LWP::UserAgent; +my $ua = LWP::UserAgent->new; + +ok $ua->is_online; + +$ua->protocols_allowed([]); +ok !$ua->is_online; diff --git a/t/local/autoload-get.t b/t/local/autoload-get.t new file mode 100644 index 0000000..5e9f2e6 --- /dev/null +++ b/t/local/autoload-get.t @@ -0,0 +1,26 @@ +# +# See if autoloading of protocol schemes work +# + +print "1..1\n"; + +require LWP::UserAgent; +# note no LWP::Protocol::file; + +$url = "file:."; + +require URI; +print "Trying to fetch '" . URI->new($url)->file . "'\n"; + +my $ua = new LWP::UserAgent; # create a useragent to test +$ua->timeout(30); # timeout in seconds + +my $response = $ua->get($url); +if ($response->is_success) { + print "ok 1\n"; + print $response->as_string; +} +else { + print "not ok 1\n"; + print $response->error_as_HTML; +} diff --git a/t/local/autoload.t b/t/local/autoload.t new file mode 100644 index 0000000..0f77db0 --- /dev/null +++ b/t/local/autoload.t @@ -0,0 +1,22 @@ +# +# See if autoloading of protocol schemes work +# + +use Test; +plan tests => 1; + +require LWP::UserAgent; +# note no LWP::Protocol::file; + +$url = "file:."; + +require URI; +print "Trying to fetch '" . URI->new($url)->file . "'\n"; + +my $ua = new LWP::UserAgent; # create a useragent to test +$ua->timeout(30); # timeout in seconds + +my $request = HTTP::Request->new(GET => $url); + +my $response = $ua->request($request); +ok($response->is_success); diff --git a/t/local/get.t b/t/local/get.t new file mode 100644 index 0000000..7c019c9 --- /dev/null +++ b/t/local/get.t @@ -0,0 +1,86 @@ +# +# Test retrieving a file with a 'file://' URL, +# + +if ($^O eq "MacOS") { + print "1..0\n"; + exit; +} + + +# First locate some suitable tmp-dir. We need an absolute path. +$TMPDIR = undef; +for ("/tmp/", "/var/tmp", "/usr/tmp", "/local/tmp") { + if (open(TEST, ">$_/test-$$")) { + close(TEST); + unlink("$_/test-$$"); + $TMPDIR = $_; + last; + } +} +$TMPDIR ||= $ENV{TEMP} if $^O eq 'MSWin32'; +unless ($TMPDIR) { + # Can't run any tests + print "1..0\n"; + print "ok 1\n"; + exit; +} +$TMPDIR =~ tr|\\|/|; + +use Test; +plan tests => 2; + +use LWP::Simple; +require LWP::Protocol::file; + +my $orig = "$TMPDIR/lwp-orig-$$"; # local file +my $copy = "$TMPDIR/lwp-copy-$$"; # downloaded copy + +# First we create the original +open(OUT, ">$orig") or die "Cannot open $orig: $!"; +binmode(OUT); +for (1..5) { + print OUT "This is line $_ of $orig\n"; +} +close(OUT); + + +# Then we make a test using getprint(), so we need to capture stdout +open (OUT, ">$copy") or die "Cannot open $copy: $!"; +select(OUT); + +# do the retrieval +getprint("file://localhost" . ($orig =~ m|^/| ? $orig : "/$orig")); + +close(OUT); +select(STDOUT); + +# read and compare the files +open(IN, $orig) or die "Cannot open '$orig': $!"; +undef($/); +$origtext = <IN>; +close(IN); +open(IN, $copy) or die "Cannot open '$copy': $!"; +undef($/); +$copytext = <IN>; +close(IN); + +unlink($copy); + +ok($copytext, $origtext); + + +# Test getstore() function + +getstore("file:$orig", $copy); + +# Take a look at the new copy +open(IN, $copy) or die "Cannot open '$copy': $!"; +undef($/); +$copytext = <IN>; +close(IN); + +unlink($orig); +unlink($copy); + +ok($copytext, $origtext); diff --git a/t/local/http.t b/t/local/http.t new file mode 100644 index 0000000..917b04d --- /dev/null +++ b/t/local/http.t @@ -0,0 +1,498 @@ +use FindBin qw($Bin); +if ($^O eq "MacOS") { + print "1..0\n"; + exit(0); +} + +if (0 != system($^X, "$Bin/../../talk-to-ourself")) { + print "1..0 # Skipped: Can't talk to ourself (misconfigured system)\n"; + exit; +} + +delete $ENV{PERL_LWP_ENV_PROXY}; + +$| = 1; # autoflush + +require IO::Socket; # make sure this work before we try to make a HTTP::Daemon + +# First we make ourself a daemon in another process +my $D = shift || ''; +if ($D eq 'daemon') { + + require HTTP::Daemon; + + my $d = HTTP::Daemon->new(Timeout => 10); + + print "Please to meet you at: <URL:", $d->url, ">\n"; + open(STDOUT, $^O eq 'VMS'? ">nl: " : ">/dev/null"); + + while ($c = $d->accept) { + $r = $c->get_request; + if ($r) { + my $p = ($r->uri->path_segments)[1]; + my $func = lc("httpd_" . $r->method . "_$p"); + if (defined &$func) { + &$func($c, $r); + } + else { + $c->send_error(404); + } + } + $c = undef; # close connection + } + print STDERR "HTTP Server terminated\n"; + exit; +} +else { + use Config; + my $perl = $Config{'perlpath'}; + $perl = $^X if $^O eq 'VMS' or -x $^X and $^X =~ m,^([a-z]:)?/,i; + open(DAEMON, "$perl $0 daemon |") or die "Can't exec daemon: $!"; +} + +use Test::More; +plan tests => 63; + +my $greeting = <DAEMON>; +$greeting =~ /(<[^>]+>)/; + +require URI; +my $base = URI->new($1); +sub url { + my $u = URI->new(@_); + $u = $u->abs($_[1]) if @_ > 1; + $u->as_string; +} + +print "Will access HTTP server at $base\n"; + +require LWP::UserAgent; +require HTTP::Request; +$ua = new LWP::UserAgent; +$ua->agent("Mozilla/0.01 " . $ua->agent); +$ua->from('gisle@aas.no'); + +#---------------------------------------------------------------- +print "Bad request...\n"; +$req = new HTTP::Request GET => url("/not_found", $base); +$req->header(X_Foo => "Bar"); +$res = $ua->request($req); + +ok($res->is_error, 'is_error'); +is($res->code, 404, 'response code 404'); +like($res->message, qr/not\s+found/i, '404 message'); +# we also expect a few headers +ok($res->server); +ok($res->date); + +#---------------------------------------------------------------- +print "Simple echo...\n"; +sub httpd_get_echo +{ + my($c, $req) = @_; + $c->send_basic_header(200); + print $c "Content-Type: message/http\015\012"; + $c->send_crlf; + print $c $req->as_string; +} + +$req = new HTTP::Request GET => url("/echo/path_info?query", $base); +$req->push_header(Accept => 'text/html'); +$req->push_header(Accept => 'text/plain; q=0.9'); +$req->push_header(Accept => 'image/*'); +$req->push_header(':foo_bar' => 1); +$req->if_modified_since(time - 300); +$req->header(Long_text => 'This is a very long header line +which is broken between +more than one line.'); +$req->header(X_Foo => "Bar"); + +$res = $ua->request($req); +#print $res->as_string; + +ok($res->is_success); +is($res->code, 200, 'status code 200'); +is($res->message, "OK", 'message OK'); + +$_ = $res->content; +@accept = /^Accept:\s*(.*)/mg; + +like($_, qr/^From:\s*gisle\@aas\.no\n/m); +like($_, qr/^Host:/m); +is(@accept, 3, '3 items in accept'); +like($_, qr/^Accept:\s*text\/html/m); +like($_, qr/^Accept:\s*text\/plain/m); +like($_, qr/^Accept:\s*image\/\*/m); +like($_, qr/^If-Modified-Since:\s*\w{3},\s+\d+/m); +like($_, qr/^Long-Text:\s*This.*broken between/m); +like($_, qr/^Foo-Bar:\s*1\n/m); +like($_, qr/^X-Foo:\s*Bar\n/m); +like($_, qr/^User-Agent:\s*Mozilla\/0.01/m); + +# Try it with the higher level 'get' interface +$res = $ua->get(url("/echo/path_info?query", $base), + Accept => 'text/html', + Accept => 'text/plain; q=0.9', + Accept => 'image/*', + X_Foo => "Bar", +); +#$res->dump; +is($res->code, 200, 'response code 200'); + +#---------------------------------------------------------------- +print "UserAgent->put...\n"; +sub httpd_put_echo +{ + my($c, $req) = @_; + $c->send_basic_header(200); + print $c "Content-Type: message/http\015\012"; + $c->send_crlf; + print $c $req->as_string; +} +ok($res->content, qr/^From: gisle\@aas.no$/m); +# Try it with the higher level 'get' interface +$res = $ua->put(url("/echo/path_info?query", $base), + Accept => 'text/html', + Accept => 'text/plain; q=0.9', + Accept => 'image/*', + X_Foo => "Bar", +); +#$res->dump; +is($res->code, 200, 'response code 200'); +ok($res->content, qr/^From: gisle\@aas.no$/m); + +#---------------------------------------------------------------- +print "UserAgent->delete...\n"; +sub httpd_delete_echo +{ + my($c, $req) = @_; + $c->send_basic_header(200); + print $c "Content-Type: message/http\015\012"; + $c->send_crlf; + print $c $req->as_string; +} +ok($res->content, qr/^From: gisle\@aas.no$/m); +# Try it with the higher level 'get' interface +$res = $ua->delete(url("/echo/path_info?query", $base), + Accept => 'text/html', + Accept => 'text/plain; q=0.9', + Accept => 'image/*', + X_Foo => "Bar", +); +#$res->dump; +is($res->code, 200, 'response code 200'); +ok($res->content, qr/^From: gisle\@aas.no$/m); + +#---------------------------------------------------------------- +print "Send file...\n"; + +my $file = "test-$$.html"; +open(FILE, ">$file") or die "Can't create $file: $!"; +binmode FILE or die "Can't binmode $file: $!"; +print FILE <<EOT; +<html><title>En prøve</title> +<h1>Dette er en testfil</h1> +Jeg vet ikke hvor stor fila behøver å være heller, men dette +er sikkert nok i massevis. +EOT +close(FILE); + +sub httpd_get_file +{ + my($c, $r) = @_; + my %form = $r->uri->query_form; + my $file = $form{'name'}; + $c->send_file_response($file); + unlink($file) if $file =~ /^test-/; +} + +$req = new HTTP::Request GET => url("/file?name=$file", $base); +$res = $ua->request($req); +#print $res->as_string; + +ok($res->is_success); +ok($res->content_type, 'text/html'); +is($res->content_length, 147, '147 content length'); +ok($res->title, 'En prøve'); +ok($res->content, qr/å være/); + +# A second try on the same file, should fail because we unlink it +$res = $ua->request($req); +#print $res->as_string; +ok($res->is_error); +is($res->code, 404, 'response code 404'); # not found + +# Then try to list current directory +$req = new HTTP::Request GET => url("/file?name=.", $base); +$res = $ua->request($req); +#print $res->as_string; +is($res->code, 501, 'response code 501'); # NYI + + +#---------------------------------------------------------------- +print "Check redirect...\n"; +sub httpd_get_redirect +{ + my($c) = @_; + $c->send_redirect("/echo/redirect"); +} + +$req = new HTTP::Request GET => url("/redirect/foo", $base); +$res = $ua->request($req); +#print $res->as_string; + +ok($res->is_success); +ok($res->content, qr|/echo/redirect|); +ok($res->previous->is_redirect); +is($res->previous->code, 301, 'response code 301'); + +# Let's test a redirect loop too +sub httpd_get_redirect2 { shift->send_redirect("/redirect3/") } +sub httpd_get_redirect3 { shift->send_redirect("/redirect2/") } + +$req->uri(url("/redirect2", $base)); +$ua->max_redirect(5); +$res = $ua->request($req); +#print $res->as_string; +ok($res->is_redirect); +ok($res->header("Client-Warning"), qr/loop detected/i); +is($res->redirects, 5, '5 max redirects'); + +$ua->max_redirect(0); +$res = $ua->request($req); +is($res->previous, undef, 'undefined previous'); +is($res->redirects, 0, 'zero redirects'); +$ua->max_redirect(5); + +#---------------------------------------------------------------- +print "Check basic authorization...\n"; +sub httpd_get_basic +{ + my($c, $r) = @_; + #print STDERR $r->as_string; + my($u,$p) = $r->authorization_basic; + if (defined($u) && $u eq 'ok 12' && $p eq 'xyzzy') { + $c->send_basic_header(200); + print $c "Content-Type: text/plain"; + $c->send_crlf; + $c->send_crlf; + $c->print("$u\n"); + } + else { + $c->send_basic_header(401); + $c->print("WWW-Authenticate: Basic realm=\"libwww-perl\"\015\012"); + $c->send_crlf; + } +} + +{ + package MyUA; @ISA=qw(LWP::UserAgent); + sub get_basic_credentials { + my($self, $realm, $uri, $proxy) = @_; + if ($realm eq "libwww-perl" && $uri->rel($base) eq "basic") { + return ("ok 12", "xyzzy"); + } + else { + return undef; + } + } +} +$req = new HTTP::Request GET => url("/basic", $base); +$res = MyUA->new->request($req); +#print $res->as_string; + +ok($res->is_success); +#print $res->content; + +# Let's try with a $ua that does not pass out credentials +$res = $ua->request($req); +is($res->code, 401, 'respone code 401'); + +# Let's try to set credentials for this realm +$ua->credentials($req->uri->host_port, "libwww-perl", "ok 12", "xyzzy"); +$res = $ua->request($req); +ok($res->is_success); + +# Then illegal credentials +$ua->credentials($req->uri->host_port, "libwww-perl", "user", "passwd"); +$res = $ua->request($req); +is($res->code, 401, 'response code 401'); + + +#---------------------------------------------------------------- +print "Check digest authorization...\n"; +sub httpd_get_digest +{ + my($c, $r) = @_; +# print STDERR $r->as_string; + my $auth = $r->authorization; + my %auth_params; + if ( defined($auth) && $auth =~ /^Digest\s+(.*)$/ ) { + %auth_params = map { split /=/ } split /,\s*/, $1; + } + if ( %auth_params && + $auth_params{username} eq "\"ok 23\"" && + $auth_params{realm} eq "\"libwww-perl-digest\"" && + $auth_params{qop} eq "auth" && + $auth_params{algorithm} eq "\"MD5\"" && + $auth_params{uri} eq "\"/digest\"" && + $auth_params{nonce} eq "\"12345\"" && + $auth_params{nc} eq "00000001" && + defined($auth_params{cnonce}) && + defined($auth_params{response}) + ) { + $c->send_basic_header(200); + print $c "Content-Type: text/plain"; + $c->send_crlf; + $c->send_crlf; + $c->print("ok\n"); + } + else { + $c->send_basic_header(401); + $c->print("WWW-Authenticate: Digest realm=\"libwww-perl-digest\", nonce=\"12345\", qop=auth\015\012"); + $c->send_crlf; + } +} + +{ + package MyUA2; @ISA=qw(LWP::UserAgent); + sub get_basic_credentials { + my($self, $realm, $uri, $proxy) = @_; + if ($realm eq "libwww-perl-digest" && $uri->rel($base) eq "digest") { + return ("ok 23", "xyzzy"); + } + else { + return undef; + } + } +} +$req = new HTTP::Request GET => url("/digest", $base); +$res = MyUA2->new->request($req); +#print STDERR $res->as_string; + +ok($res->is_success); +#print $res->content; + +# Let's try with a $ua that does not pass out credentials +$ua->{basic_authentication}=undef; +$res = $ua->request($req); +is($res->code, 401, 'respone code 401'); + +# Let's try to set credentials for this realm +$ua->credentials($req->uri->host_port, "libwww-perl-digest", "ok 23", "xyzzy"); +$res = $ua->request($req); +#print STDERR $res->as_string; +ok($res->is_success); + +# Then illegal credentials +$ua->credentials($req->uri->host_port, "libwww-perl-digest", "user2", "passwd"); +$res = $ua->request($req); +is($res->code, 401, 'response code 401'); + + + +#---------------------------------------------------------------- +print "Check proxy...\n"; +sub httpd_get_proxy +{ + my($c,$r) = @_; + if ($r->method eq "GET" and + $r->uri->scheme eq "ftp") { + $c->send_basic_header(200); + $c->send_crlf; + } + else { + $c->send_error; + } +} + +$ua->proxy(ftp => $base); +$req = new HTTP::Request GET => "ftp://ftp.perl.com/proxy"; +$res = $ua->request($req); +#print $res->as_string; +ok($res->is_success); + +#---------------------------------------------------------------- +print "Check POSTing...\n"; +sub httpd_post_echo +{ + my($c,$r) = @_; + $c->send_basic_header; + $c->print("Content-Type: text/plain"); + $c->send_crlf; + $c->send_crlf; + + # Do it the hard way to test the send_file + open(TMP, ">tmp$$") || die; + binmode(TMP); + print TMP $r->as_string; + close(TMP) || die; + + $c->send_file("tmp$$"); + + unlink("tmp$$"); +} + +$req = new HTTP::Request POST => url("/echo/foo", $base); +$req->content_type("application/x-www-form-urlencoded"); +$req->content("foo=bar&bar=test"); +$res = $ua->request($req); +#print $res->as_string; + +$_ = $res->content; +ok($res->is_success); +ok($_, qr/^Content-Length:\s*16$/mi); +ok($_, qr/^Content-Type:\s*application\/x-www-form-urlencoded$/mi); +ok($_, qr/^foo=bar&bar=test$/m); + +$req = HTTP::Request->new(POST => url("/echo/foo", $base)); +$req->content_type("multipart/form-data"); +$req->add_part(HTTP::Message->new(["Content-Type" => "text/plain"], "Hi\n")); +$req->add_part(HTTP::Message->new(["Content-Type" => "text/plain"], "there\n")); +$res = $ua->request($req); +#print $res->as_string; +ok($res->is_success); +ok($res->content =~ /^Content-Type: multipart\/form-data; boundary=/m); + +#---------------------------------------------------------------- +print "Check partial content response...\n"; +sub httpd_get_partial +{ + my($c) = @_; + $c->send_basic_header(206); + print $c "Content-Type: image/jpeg\015\012"; + $c->send_crlf; + print $c "some fake JPEG content"; + +} + +{ + $req = HTTP::Request->new( GET => url("/partial", $base) ); + $res = $ua->request($req); + ok($res->is_success); # "a 206 response is considered successful" +} +{ + $ua->max_size(3); + $req = HTTP::Request->new( GET => url("/partial", $base) ); + $res = $ua->request($req); + ok($res->is_success); # "a 206 response is considered successful" + # Put max_size back how we found it. + $ua->max_size(undef); + ok($res->as_string, qr/Client-Aborted: max_size/); # Client-Aborted is returned when max_size is given +} + + +#---------------------------------------------------------------- +print "Terminating server...\n"; +sub httpd_get_quit +{ + my($c) = @_; + $c->send_error(503, "Bye, bye"); + exit; # terminate HTTP server +} + +$req = new HTTP::Request GET => url("/quit", $base); +$res = $ua->request($req); + +is($res->code, 503, 'response code is 503'); +ok($res->content, qr/Bye, bye/); diff --git a/t/local/protosub.t b/t/local/protosub.t new file mode 100644 index 0000000..c271846 --- /dev/null +++ b/t/local/protosub.t @@ -0,0 +1,70 @@ +#!perl + +use strict; +use Test; +plan tests => 6; + +# This test tries to make a custom protocol implementation by +# subclassing of LWP::Protocol. + + +use LWP::UserAgent (); +use LWP::Protocol (); + +LWP::Protocol::implementor(http => 'myhttp'); + +my $ua = LWP::UserAgent->new; +$ua->proxy('ftp' => "http://www.sn.no/"); + +my $req = HTTP::Request->new(GET => 'ftp://foo/'); +$req->header(Cookie => "perl=cool"); + +my $res = $ua->request($req); + +#print $res->as_string; +ok($res->code, 200); +ok($res->content, "Howdy\n"); +exit; + + +#---------------------------------- +package myhttp; + +use Test qw(ok); + +BEGIN { + use vars qw(@ISA); + @ISA=qw(LWP::Protocol); +} + +sub new +{ + my $class = shift; + print "CTOR: $class->new(@_)\n"; + my($prot) = @_; + ok($prot, "http"); + my $self = $class->SUPER::new(@_); + for (keys %$self) { + my $v = $self->{$_}; + $v = "<undef>" unless defined($v); + print "$_: $v\n"; + } + $self; +} + +sub request +{ + my $self = shift; + my($request, $proxy, $arg, $size, $timeout) = @_; + #print $request->as_string; + + ok($proxy, "http://www.sn.no/"); + ok($request->uri, "ftp://foo/"); + ok($request->header("cookie"), "perl=cool"); + + my $res = HTTP::Response->new(200 => "OK"); + $res->content_type("text/plain"); + $res->date(time); + $self->collect_once($arg, $res, "Howdy\n"); + $res; +} diff --git a/t/net/cgi-bin/moved b/t/net/cgi-bin/moved new file mode 100755 index 0000000..563a150 --- /dev/null +++ b/t/net/cgi-bin/moved @@ -0,0 +1,4 @@ +#!/bin/sh + +echo "Location: http://$SERVER_NAME:$SERVER_PORT/" +echo diff --git a/t/net/cgi-bin/nph-slowdata b/t/net/cgi-bin/nph-slowdata new file mode 100755 index 0000000..01f012a --- /dev/null +++ b/t/net/cgi-bin/nph-slowdata @@ -0,0 +1,26 @@ +#!/usr/local/bin/perl + +# This script outputs some data slowly. It can be used to check that +# pipelined processing of response content really works. We use syswrite +# so that the script will notice when the connection is broken. + +out("HTTP/1.0 200 OK\r +Content-Type: text/plain\r +\r +"); + +for (1..5) { + out("The number is now $_\n"); + sleep(1); +} +exit; + + +sub out +{ + my $data = shift; + my $l = length $data; + if (syswrite(STDOUT, $data, $l) != $l) { + exit 1; + } +} diff --git a/t/net/cgi-bin/slowread b/t/net/cgi-bin/slowread new file mode 100755 index 0000000..f200239 --- /dev/null +++ b/t/net/cgi-bin/slowread @@ -0,0 +1,31 @@ +#!/usr/local/bin/perl + +# You might post large amount of data to this script. It will read +# it slowly. + +{ local($!) = 1; print "Content-Type: text/plain\n\n"; } + +$len = $ENV{CONTENT_LENGTH}; + +unless ($len) { + system "env"; + exit; +} + +$size = 20; # chunk size + +$content = ''; +$bytes = 0; + +sleep(1); +while ($len > 0) { + $n = sysread(STDIN, $b, $size); + last if $n <= 0; + $len -= $n; + $bytes += $n; + $content .= $b; + sleep(1); +} +print "$bytes bytes read\n"; + + diff --git a/t/net/cgi-bin/test b/t/net/cgi-bin/test new file mode 100755 index 0000000..6a1535c --- /dev/null +++ b/t/net/cgi-bin/test @@ -0,0 +1,28 @@ +#!/usr/local/bin/perl + +$| = 1; +print "Content-type: text/plain + +"; + +if (@ARGV) { + print "ARGS: "; + print join(", ", map { $_ = qq{"$_"} } @ARGV); + print "\n\n"; +} else { + print "No command line arguments passed to script\n\n"; +} + +while (($key,$val) = each %ENV) { + print "$key=$val\n"; +} + +if ($ENV{CONTENT_LENGTH}) { + $len = $ENV{CONTENT_LENGTH}; + while ($len) { + $n = sysread(STDIN, $content, $len, length $content); + last unless defined $n; + $len -= $n; + } + print "\nContent\n-------\n$content"; +} diff --git a/t/net/cgi-bin/timeout b/t/net/cgi-bin/timeout new file mode 100755 index 0000000..cbb8b72 --- /dev/null +++ b/t/net/cgi-bin/timeout @@ -0,0 +1,7 @@ +#!/bin/sh + +sleep 20; + +echo "Content-type: text/plain" +echo +echo "Test" diff --git a/t/net/config.pl.dist b/t/net/config.pl.dist new file mode 100644 index 0000000..f826e35 --- /dev/null +++ b/t/net/config.pl.dist @@ -0,0 +1,10 @@ +package net; + +# Configure these for your local system +$httpserver = "localhost:80"; +$cgidir = "/cgi-bin/lwp"; + +# Used for proxy test +$ftp_proxy = "http://localhost/"; + +1; diff --git a/t/net/http-get.t b/t/net/http-get.t new file mode 100644 index 0000000..d9eb42c --- /dev/null +++ b/t/net/http-get.t @@ -0,0 +1,51 @@ +#!/usr/local/bin/perl -w +# +# Check GET via HTTP. +# + +use FindBin qw($Bin); +if (!-e "$Bin/config.pl") { + print "1..0 # SKIP no net config file"; + exit 0; +} + +require "$Bin/config.pl"; +require HTTP::Request; +require LWP::UserAgent; + +print "1..2\n"; + +my $ua = new LWP::UserAgent; # create a useragent to test + +$netloc = $net::httpserver; +$script = $net::cgidir . "/test"; + +$url = "http://$netloc$script?query"; + +my $request = new HTTP::Request('GET', $url); + +print "GET $url\n\n"; + +my $response = $ua->request($request, undef, undef); + +my $str = $response->as_string; + +print "$str\n"; + +if ($response->is_success and $str =~ /^REQUEST_METHOD=GET$/m) { + print "ok 1\n"; +} +else { + print "not ok 1\n"; +} + +if ($str =~ /^QUERY_STRING=query$/m) { + print "ok 2\n"; +} +else { + print "not ok 2\n"; +} + +# avoid -w warning +$dummy = $net::httpserver; +$dummy = $net::cgidir; diff --git a/t/net/http-post.t b/t/net/http-post.t new file mode 100644 index 0000000..7d4ec68 --- /dev/null +++ b/t/net/http-post.t @@ -0,0 +1,52 @@ +#!/usr/local/bin/perl -w +# +# Check POST via HTTP. +# + +use FindBin qw($Bin); +if (!-e "$Bin/config.pl") { + print "1..0 # SKIP no net config file"; + exit 0; +} + +require "$Bin/config.pl"; +require HTTP::Request; +require LWP::UserAgent; + +print "1..2\n"; + +$netloc = $net::httpserver; +$script = $net::cgidir . "/test"; + +my $ua = new LWP::UserAgent; # create a useragent to test + +$url = "http://$netloc$script"; + +my $form = 'searchtype=Substring'; + +my $request = new HTTP::Request('POST', $url, undef, $form); +$request->header('Content-Type', 'application/x-www-form-urlencoded'); + +my $response = $ua->request($request, undef, undef); + +my $str = $response->as_string; + +print "$str\n"; + +if ($response->is_success and $str =~ /^REQUEST_METHOD=POST$/m) { + print "ok 1\n"; +} +else { + print "not ok 1\n"; +} + +if ($str =~ /^CONTENT_LENGTH=(\d+)$/m && $1 == length($form)) { + print "ok 2\n"; +} +else { + print "not ok 2\n"; +} + +# avoid -w warning +$dummy = $net::httpserver; +$dummy = $net::cgidir; diff --git a/t/net/http-timeout.t b/t/net/http-timeout.t new file mode 100644 index 0000000..627bb3b --- /dev/null +++ b/t/net/http-timeout.t @@ -0,0 +1,46 @@ +# +# Check timeouts via HTTP. +# + +use FindBin qw($Bin); +if (!-e "$Bin/config.pl") { + print "1..0 # SKIP no net config file"; + exit 0; +} + +require "$Bin/config.pl"; +require HTTP::Request; +require LWP::UserAgent; + +print "1..1\n"; + +my $ua = new LWP::UserAgent; # create a useragent to test + +$ua->timeout(4); + +$netloc = $net::httpserver; +$script = $net::cgidir . "/timeout"; + +$url = "http://$netloc$script"; + +my $request = new HTTP::Request('GET', $url); + +print $request->as_string; + +my $response = $ua->request($request, undef); + +my $str = $response->as_string; + +print "$str\n"; + +if ($response->is_error and + $str =~ /timeout/) { + print "ok 1\n"; +} +else { + print "nok ok 1\n"; +} + +# avoid -w warning +$dummy = $net::httpserver; +$dummy = $net::cgidir; diff --git a/t/net/mirror.t b/t/net/mirror.t new file mode 100644 index 0000000..f5a53eb --- /dev/null +++ b/t/net/mirror.t @@ -0,0 +1,41 @@ +# +# Test mirroring a file +# + +use FindBin qw($Bin); +if (!-e "$Bin/config.pl") { + print "1..0 # SKIP no net config file"; + exit 0; +} + +require "$Bin/config.pl"; +require LWP::UserAgent; +require HTTP::Status; + +print "1..2\n"; + +my $ua = new LWP::UserAgent; # create a useragent to test + +my $url = "http://$net::httpserver/"; +my $copy = "lwp-test-$$"; # downloaded copy + +my $response = $ua->mirror($url, $copy); + +if ($response->code == &HTTP::Status::RC_OK) { + print "ok 1\n"; +} +else { + print "not ok 1\n"; +} + +# OK, so now do it again, should get Not-Modified +$response = $ua->mirror($url, $copy); +if ($response->code == &HTTP::Status::RC_NOT_MODIFIED) { + print "ok 2\n"; +} +else { + print "not ok 2\n"; +} +unlink($copy); + +$net::httpserver = $net::httpserver; # avoid -w warning diff --git a/t/net/moved.t b/t/net/moved.t new file mode 100644 index 0000000..85795a6 --- /dev/null +++ b/t/net/moved.t @@ -0,0 +1,38 @@ +#!/usr/local/bin/perl -w +# + +use FindBin qw($Bin); +if (!-e "$Bin/config.pl") { + print "1..0 # SKIP no net config file"; + exit 0; +} + +require "$Bin/config.pl"; +require LWP::UserAgent; + +print "1..1\n"; + +$url = "http://$net::httpserver$net::cgidir/moved"; + +my $ua = new LWP::UserAgent; # create a useragent to test +$ua->timeout(30); # timeout in seconds + +my $request = new HTTP::Request('GET', $url); + +print $request->as_string; + +my $response = $ua->request($request, undef, undef); + +print $response->as_string, "\n"; + +if ($response->is_success) { + print "ok 1\n"; +} +else { + print "not ok 1\n"; +} + + +# avoid -w warning +$dummy = $net::httpserver; +$dummy = $net::cgidir; diff --git a/t/net/proxy.t b/t/net/proxy.t new file mode 100644 index 0000000..22582b6 --- /dev/null +++ b/t/net/proxy.t @@ -0,0 +1,42 @@ +#!/usr/local/bin/perl -w +# +# Test retrieving a file with a 'ftp://' URL, +# via a HTTP proxy. +# + +use FindBin qw($Bin); +if (!-e "$Bin/config.pl") { + print "1..0 # SKIP no net config file"; + exit 0; +} + +require "$Bin/config.pl"; + +print "1..1\n"; + +unless (defined $net::ftp_proxy) { + print "not ok 1\n"; + exit 0; +} + +require HTTP::Request; +require LWP::UserAgent; + +my $ua = new LWP::UserAgent; # create a useragent to test + +$ua->proxy('ftp', $net::ftp_proxy); + +my $url = 'ftp://ftp.uninett.no/'; + +my $request = new HTTP::Request('GET', $url); + +my $response = $ua->request($request, undef, undef); + +my $str = $response->as_string; + +if ($response->is_success) { + print "ok 1\n"; +} +else { + print "not ok 1\n"; +} diff --git a/t/robot/ua-get.t b/t/robot/ua-get.t new file mode 100644 index 0000000..63bb963 --- /dev/null +++ b/t/robot/ua-get.t @@ -0,0 +1,159 @@ +use FindBin qw($Bin); +if($^O eq "MacOS") { + print "1..0\n"; + exit(0); +} + +if (0 != system($^X, "$Bin/../../talk-to-ourself")) { + print "1..0 # Skipped: Can't talk to ourself (misconfigured system)\n"; + exit; +} + +delete $ENV{PERL_LWP_ENV_PROXY}; + +$| = 1; # autoflush +require IO::Socket; # make sure this work before we try to make a HTTP::Daemon + +# First we make ourself a daemon in another process +my $D = shift || ''; +if ($D eq 'daemon') { + + require HTTP::Daemon; + + my $d = new HTTP::Daemon Timeout => 10; + + print "Please to meet you at: <URL:", $d->url, ">\n"; + open(STDOUT, $^O eq 'MSWin32' ? ">nul" : $^O eq 'VMS' ? ">NL:" : ">/dev/null"); + + while ($c = $d->accept) { + $r = $c->get_request; + if ($r) { + my $p = ($r->uri->path_segments)[1]; + $p =~ s/\W//g; + my $func = lc("httpd_" . $r->method . "_$p"); + #print STDERR "Calling $func...\n"; + if (defined &$func) { + &$func($c, $r); + } + else { + $c->send_error(404); + } + } + $c = undef; # close connection + } + print STDERR "HTTP Server terminated\n"; + exit; +} +else { + use Config; + my $perl = $Config{'perlpath'}; + $perl = $^X if $^O eq 'VMS' or -x $^X and $^X =~ m,^([a-z]:)?/,i; + open(DAEMON , "$perl $0 daemon |") or die "Can't exec daemon: $!"; +} + +print "1..8\n"; + + +$greating = <DAEMON>; +$greating =~ /(<[^>]+>)/; + +require URI; +my $base = URI->new($1); +sub url { + my $u = URI->new(@_); + $u = $u->abs($_[1]) if @_ > 1; + $u->as_string; +} + +print "Will access HTTP server at $base\n"; + +require LWP::RobotUA; +require HTTP::Request; +$ua = new LWP::RobotUA 'lwp-spider/0.1', 'gisle@aas.no'; +$ua->delay(0.05); # rather quick robot + +#---------------------------------------------------------------- +sub httpd_get_robotstxt +{ + my($c,$r) = @_; + $c->send_basic_header; + $c->print("Content-Type: text/plain"); + $c->send_crlf; + $c->send_crlf; + $c->print("User-Agent: * +Disallow: /private + +"); +} + +sub httpd_get_someplace +{ + my($c,$r) = @_; + $c->send_basic_header; + $c->print("Content-Type: text/plain"); + $c->send_crlf; + $c->send_crlf; + $c->print("Okidok\n"); +} + +$res = $ua->get( url("/someplace", $base) ); +#print $res->as_string; +print "not " unless $res->is_success; +print "ok 1\n"; + +$res = $ua->get( url("/private/place", $base) ); +#print $res->as_string; +print "not " unless $res->code == 403 + and $res->message =~ /robots.txt/; +print "ok 2\n"; + + +$res = $ua->get( url("/foo", $base) ); +#print $res->as_string; +print "not " unless $res->code == 404; # not found +print "ok 3\n"; + +# Let the robotua generate "Service unavailable/Retry After response"; +$ua->delay(1); +$ua->use_sleep(0); + +$res = $ua->get( url("/foo", $base) ); +#print $res->as_string; +print "not " unless $res->code == 503 # Unavailable + and $res->header("Retry-After"); +print "ok 4\n"; + +#---------------------------------------------------------------- +print "Terminating server...\n"; +sub httpd_get_quit +{ + my($c) = @_; + $c->send_error(503, "Bye, bye"); + exit; # terminate HTTP server +} + +$ua->delay(0); + +$res = $ua->get( url("/quit", $base) ); + +print "not " unless $res->code == 503 and $res->content =~ /Bye, bye/; +print "ok 5\n"; + +#--------------------------------------------------------------- +$ua->delay(1); + +# host_wait() should be around 60s now +print "not " unless abs($ua->host_wait($base->host_port) - 60) < 5; +print "ok 6\n"; + +# Number of visits to this place should be +print "not " unless $ua->no_visits($base->host_port) == 4; +print "ok 7\n"; + +# RobotUA used to have problem with mailto URLs. +$ENV{SENDMAIL} = "dummy"; +$res = $ua->get("mailto:gisle\@aas.no"); +#print $res->as_string; + +print "not " unless $res->code == 400 && $res->message eq "Library does not allow method GET for 'mailto:' URLs"; +print "ok 8\n"; diff --git a/t/robot/ua.t b/t/robot/ua.t new file mode 100644 index 0000000..3000e71 --- /dev/null +++ b/t/robot/ua.t @@ -0,0 +1,154 @@ +use FindBin qw($Bin); +if($^O eq "MacOS") { + print "1..0\n"; + exit(0); +} + +if (0 != system($^X, "$Bin/../../talk-to-ourself")) { + print "1..0 # Skipped: Can't talk to ourself (misconfigured system)\n"; + exit; +} + +delete $ENV{PERL_LWP_ENV_PROXY}; + +$| = 1; # autoflush +require IO::Socket; # make sure this work before we try to make a HTTP::Daemon + +# First we make ourself a daemon in another process +my $D = shift || ''; +if ($D eq 'daemon') { + + require HTTP::Daemon; + + my $d = new HTTP::Daemon Timeout => 10; + + print "Please to meet you at: <URL:", $d->url, ">\n"; + open(STDOUT, $^O eq 'MSWin32' ? ">nul" : $^O eq 'VMS' ? ">NL:" : ">/dev/null"); + + while ($c = $d->accept) { + $r = $c->get_request; + if ($r) { + my $p = ($r->uri->path_segments)[1]; + $p =~ s/\W//g; + my $func = lc("httpd_" . $r->method . "_$p"); + #print STDERR "Calling $func...\n"; + if (defined &$func) { + &$func($c, $r); + } + else { + $c->send_error(404); + } + } + $c = undef; # close connection + } + print STDERR "HTTP Server terminated\n"; + exit; +} +else { + use Config; + my $perl = $Config{'perlpath'}; + $perl = $^X if $^O eq 'VMS' or -x $^X and $^X =~ m,^([a-z]:)?/,i; + open(DAEMON , "$perl $0 daemon |") or die "Can't exec daemon: $!"; +} + +print "1..7\n"; + + +$greating = <DAEMON>; +$greating =~ /(<[^>]+>)/; + +require URI; +my $base = URI->new($1); +sub url { + my $u = URI->new(@_); + $u = $u->abs($_[1]) if @_ > 1; + $u->as_string; +} + +print "Will access HTTP server at $base\n"; + +require LWP::RobotUA; +require HTTP::Request; +$ua = new LWP::RobotUA 'lwp-spider/0.1', 'gisle@aas.no'; +$ua->delay(0.05); # rather quick robot + +#---------------------------------------------------------------- +sub httpd_get_robotstxt +{ + my($c,$r) = @_; + $c->send_basic_header; + $c->print("Content-Type: text/plain"); + $c->send_crlf; + $c->send_crlf; + $c->print("User-Agent: * +Disallow: /private + +"); +} + +sub httpd_get_someplace +{ + my($c,$r) = @_; + $c->send_basic_header; + $c->print("Content-Type: text/plain"); + $c->send_crlf; + $c->send_crlf; + $c->print("Okidok\n"); +} + +$req = new HTTP::Request GET => url("/someplace", $base); +$res = $ua->request($req); +#print $res->as_string; +print "not " unless $res->is_success; +print "ok 1\n"; + +$req = new HTTP::Request GET => url("/private/place", $base); +$res = $ua->request($req); +#print $res->as_string; +print "not " unless $res->code == 403 + and $res->message =~ /robots.txt/; +print "ok 2\n"; + +$req = new HTTP::Request GET => url("/foo", $base); +$res = $ua->request($req); +#print $res->as_string; +print "not " unless $res->code == 404; # not found +print "ok 3\n"; + +# Let the robotua generate "Service unavailable/Retry After response"; +$ua->delay(1); +$ua->use_sleep(0); +$req = new HTTP::Request GET => url("/foo", $base); +$res = $ua->request($req); +#print $res->as_string; +print "not " unless $res->code == 503 # Unavailable + and $res->header("Retry-After"); +print "ok 4\n"; + +#---------------------------------------------------------------- +print "Terminating server...\n"; +sub httpd_get_quit +{ + my($c) = @_; + $c->send_error(503, "Bye, bye"); + exit; # terminate HTTP server +} + +$ua->delay(0); +$req = new HTTP::Request GET => url("/quit", $base); +$res = $ua->request($req); + +print "not " unless $res->code == 503 and $res->content =~ /Bye, bye/; +print "ok 5\n"; + +#--------------------------------------------------------------- +$ua->delay(1); + +# host_wait() should be around 60s now +print "not " unless abs($ua->host_wait($base->host_port) - 60) < 5; +print "ok 6\n"; + +# Number of visits to this place should be +print "not " unless $ua->no_visits($base->host_port) == 4; +print "ok 7\n"; + diff --git a/talk-to-ourself b/talk-to-ourself new file mode 100644 index 0000000..6c0257a --- /dev/null +++ b/talk-to-ourself @@ -0,0 +1,49 @@ +#!perl -w + +# This program check if we are able to talk to ourself. Misconfigured +# systems that can't talk to their own 'hostname' was the most commonly +# reported libwww-failure. + +use strict; +require IO::Socket; + +if (@ARGV >= 2 && $ARGV[0] eq "--port") { + my $port = $ARGV[1]; + require Sys::Hostname; + my $host = Sys::Hostname::hostname(); + if (my $socket = IO::Socket::INET->new(PeerAddr => "$host:$port", Timeout => 5)) { + require IO::Select; + if (IO::Select->new($socket)->can_read(1)) { + my($n, $buf); + if ($n = sysread($socket, $buf, 512)) { + exit if $buf eq "Hi there!\n"; + die "Seems to be talking to the wrong server at $host:$port, got \"$buf\"\n"; + } + elsif (defined $n) { + die "Immediate EOF from server at $host:$port\n"; + } + else { + die "Can't read from server at $host:$port: $!"; + } + } + die "No response from server at $host:$port\n"; + } + die "Can't connect: $@\n"; +} + +# server code +my $socket = IO::Socket::INET->new(Listen => 1, Timeout => 5); +my $port = $socket->sockport; +open(CLIENT, qq("$^X" "$0" --port $port |)) || die "Can't run $^X $0: $!\n"; + +if (my $client = $socket->accept) { + print $client "Hi there!\n"; + close($client) || die "Can't close socket: $!"; +} +else { + warn "Test server timeout\n"; +} + +exit if close(CLIENT); +die "Can't wait for client: $!" if $!; +die "The can-we-talk-to-ourself test failed.\n"; |