diff options
109 files changed, 11853 insertions, 0 deletions
@@ -0,0 +1,1009 @@ +Revision history for URI + +2015-07-25 Karen Etheridge <ether@cpan.org> + + Release 1.69 + + Karen Etheridge: + - add $VERSIONs for all modules that lack them + + Olaf Alders: + - add missing documentation for URI::sftp + - Clarify use of query_param() method + + +2015-06-25 Karen Etheridge <ether@cpan.org> + + Release 1.68 + + Kent Fredric: + - Sort hash keys to make generated query predictable + + Slaven Rezic: + - Add new tests for path segments + + Brendan Byrd: + - Add sftp scheme + + +2015-02-24 Karen Etheridge <ether@cpan.org> + + Release 1.67 + + Karen Etheridge: + - properly skip author test for normal user installs + + +2015-02-24 Karen Etheridge <ether@cpan.org> + + Release 1.66 + + Adam Herzog: + - reorganize .pm files under lib/ (github #20) + + +2014-11-05 Karen Etheridge <ether@cpan.org> + + Release 1.65 + + Karen Etheridge: + - add a TO_JSON method, to assist JSON serialization + + +2014-07-13 Karen Etheridge <ether@cpan.org> + + Release 1.64 + + Eric Brine: + - better fix for RT#96941, that also works around utf8 bugs on older perls + + +2014-07-13 Karen Etheridge <ether@cpan.org> + + Release 1.63 + + Karen Etheridge: + - mark utf8-related test failures on older perls caused by recent string + parsing changes as TODO (RT#97177, RT#96941) + + +2014-07-12 Karen Etheridge <ether@cpan.org> + + Release 1.62 + + Karen Etheridge (2): + - use strict and warnings in all modules, tests and scripts + - remove all remaining uses of "use vars" + + Eric Brine: + - fixed new "\C is deprecated in regex" warning in 5.21.2 (RT#96941) + + +2014-07-01 Karen Etheridge <ether@cpan.org> + + Release 1.61 + + David Schmidt: + Fix test failure if local hostname is 'foo' [RT#75519] + + Gisle Aas (2): + New 'has_recognized_scheme' interface [RT#71204] + Interfaces that return a single value now return undef rather than an + empty list in list context + + Slaven Rezic: + Fix bad regex when parsing hostnames + + Piotr Roszatycki: + Preferentially use $ENV{TMPDIR} for temporary test files over /tmp + (fixes tests on Android) + + + +2012-03-25 Gisle Aas <gisle@ActiveState.com> + + Release 1.60 + + Gisle Aas (3): + Merge pull request #4 from hiratara/fix-repourl + Updated repository URL + Avoid failure if the local hostname is 'foo' [RT#75519] + + Masahiro Honma (1): + Fix the URL of the repository. + + Matt Lawrence (1): + Do not reverse the order of new parameters + + Peter Rabbitson (1): + Fix RT#59274 - courtesy of a stupid 5.8.[12] join bug + + + + +2011-08-15 Gisle Aas <gisle@ActiveState.com> + + Release 1.59 + + Make sure accessor methods don't return utf8::upgraded() bytes + for URLs initialized from Unicode strings. + + Version number increments. + + Documentation tweaks. + + + +2011-01-23 Gisle Aas <gisle@ActiveState.com> + + Release 1.58 + + This release reverts the patch in 1.57 that made query_form distingush + between empty and undef values. It broke stuff. [RT#62708] + + + +2011-01-22 Gisle Aas <gisle@ActiveState.com> + + Release 1.57 + + Mark Stosberg (8): + typo fix: s/do deal/to deal/ + best practice: s/foreach /for / + Whitespace: fix inconsistent use of tabs vs spaces + Code style: fix inconsistency with subroutine braces at the end of the line vs below it. + Modernize: s/use vars/our/ ... since we require 5.6 as a minimum version now + Whitespace: fix indentation so blocks are consistently indented + Add formal terms "Percent-encode" and "Percent-decode" to the NAME and description to match the RFC + Drop support for Perl < 5.8.1 Perl 5.8 was released almost 10 years ago. It's time. + + Gisle Aas (6): + Convert test to use Test::More + Adjust tests for query_form + Avoid "Use of uninitialized value"-noise from query_form + State test dependencies [RT#61538] + We also depend on ExtUtils::MakeMaker + State 5.8 dependency in the META.yml file + + Ville Skyttä (2): + Guess HTTPS and FTP from URI::Heuristic input with port but no scheme. + Try harder to guess scheme from hostnames besides just "$scheme.*" ones. + + John Miller (1): + Distingush between empty and undef values in query_form [RT#62708] + + + +2010-10-06 Gisle Aas <gisle@ActiveState.com> + + Release 1.56 + + Don't depend on DNS for the heuristics test + + + +2010-09-01 Gisle Aas <gisle@ActiveState.com> + + Release 1.55 + + Gisle Aas (2): + Treat ? as a reserved character in file: URIs + " is not a URI character [RT#56421] + + Torsten F<C3><B6>rtsch (1): + Avoid test failure unless defined $Config{useperlio} + + + +2010-03-31 Gisle Aas <gisle@ActiveState.com> + + Release 1.54 + + Alex Kapranoff (1): + Fix heuristic test fails on hosts in .su (or .uk) domains [RT#56135] + + + +2010-03-14 Gisle Aas <gisle@ActiveState.com> + + Release 1.53 + + Ville Skyttä (6): + Remove unneeded execute permissions. + Add $uri->secure() method. + Documentation and comment spelling fixes. + Fix heuristics when COUNTRY is set to "gb". + Use HTTP_ACCEPT_LANGUAGE, LC_ALL, and LANG in country heuristics. + POD linking improvements. + + Michael G. Schwern (2): + Rewrite the URI::Escape tests with Test::More + Update URI::Escape for RFC 3986 + + Gisle Aas (1): + Bump MIN_PERL_VERSION to 5.6.1 [RT#54078] + + Salvatore Bonaccorso (1): + Suppress wide caracters warnings in iri.t [RT#53737] + + + +2009-12-30 Gisle Aas <gisle@ActiveState.com> + + Release 1.52 + + Gisle Aas (7): + Encode::decode('UTF-8',...) with callback implemented in 2.39 + %%host%% hack can be removed when URI::_server::as_iri works + Don't croak on IRIs that can't be IDNA encoded + IDNA roundtrip test on wrong variable + Check behaviour when feeded URI constructor Latin-1 chars + Add some test examples from draft-duerst-iri-bis.txt + Need to recognize lower case hex digits as well + + + +2009-11-23 Gisle Aas <gisle@ActiveState.com> + + Release 1.51 + + Fixup a test that was broken on Windows + + + +2009-11-21 Gisle Aas <gisle@ActiveState.com> + + Release 1.50 + + The main news in this release is the initial attempt at providing + support to IRIs. URI objects now support the 'as_iri' and 'ihost' + methods. + + Gisle Aas (28): + Added more tests for setting IPv6 addresses using the host method + Document how the host methods deal with IPv6 addresses + A "test case" to start IDNA prototype from + Escape IDNA hostnames + Introduce the as_unicode method + Make as_unicode undo punycode for server URLs + An IRI class might be helpful (RFC 3987) + Must punycode each part of the domain name separately + Include initial private Punycode module + Get URI::_punycode working + punycode of plain ascii should not edit with "-" + Some more tests from RFC 3492 + Add private URI::_idna module based on encodings/idna.py + Start using URI::_idna for encoding of URIs + Avoid various use of undef warnings + Fix test affected by IDNA + Keep reference to IDNA::Punycode in the URI::_punycode docs + Ensure upgraded strings as input + Update manifest with the new idna/punycode files + Rename as_unicde to as_iri + draft-duerst-iri-bis-07: The proposed RFC 3987 update + Load Encode when its used + Rename host_unicode as ihost + Add basic iri test + Hack to make as_iri turn A-labels into U-labels + Make as_iri leave escapes not forming valid UTF-8 sequences + Merge branch 'iri' + Don't include RFCs in the cpan tarball + + Michael G. Schwern (3): + Fix != overloading to match == + Note that mailto does not contain a host() and this is not a bug. + Strip brackets off IPv6 hosts [RT#34309] + + + +2009-08-14 Gisle Aas <gisle@ActiveState.com> + + Release 1.40 + + Even stricter test for working DNS, 2nd try. + + + +2009-08-13 Gisle Aas <gisle@ActiveState.com> + + Release 1.39 + + Even stricter test for working DNS, hopefully this gets rid of the rest of + the heuristics.t failures. + + + +2009-05-27 Gisle Aas <gisle@ActiveState.com> + + Release 1.38 + + Ville Skyttä (3): + Spelling fixes. + + Tatsuhiko Miyagawa (1): + skip DNS test if wildcard domain catcher (e.g. OpenDNS) is there + + Gisle Aas (1): + Avoid "Insecure $ENV{PATH} while running with -T switch" error with perl-5.6. + + + +2008-06-16 Gisle Aas <gisle@ActiveState.com> + + Release 1.37 + + Gisle Aas (1): + Support ";" delimiter in $u->query_form + + Jan Dubois (1): + We get different test result when www.perl.com doesn't resolve. + + Kenichi Ishigaki (1): + URI::Heuristic didn't work for generic country code [RT#35156] + + + +2008-04-03 Gisle Aas <gisle@ActiveState.com> + + Release 1.36 + + <gerard@tty.nl>: Escape Unicode strings as UTF-8. + + Bjoern Hoehrmann <derhoermi@gmx.net>: fixed URL encoded data: URLs + + GAAS: uri_escape_utf8() now exported by default as documented. + + BDFOY: Test fails with Business::ISBN installed [RT#33220] + + JDHEDDEN: lc(undef) reports warning in blead [RT#32742] + + GEOFFR: add some tests for gopher URIs [RT#29211] + + + +2004-11-05 Gisle Aas <gisle@ActiveState.com> + + Release 1.35 + + Documentation update. + + Simplified uri_escape_utf8 implementation. No need to load the + Encode module. Contributed by Alexey Tourbin. + + Work around bug in perl-5.6.0 that made t/query.t fail. + + + +2004-10-05 Gisle Aas <gisle@ActiveState.com> + + Release 1.34 + + URI->canonical will now always unescape any escaped unreserved + chars. Previously this only happened for the http and https scheme. + Patch contributed by Eric Promislow <ericp@ActiveState.com>. + + + +2004-09-19 Gisle Aas <gisle@ActiveState.com> + + Release 1.33 + + URI::file->canonical will now try to change the 'authority' + to the default one. + + Fix heuristic test. Apparently www.perl.co.uk is no more. + + + +2004-09-07 Gisle Aas <gisle@ActiveState.com> + + Release 1.32 + + Introduce $URI::file::DEFAULT_AUTHORITY which control what + authority string to use for absolute file URIs. Its value + default to "" which produce file URIs that better interoperates + with other implementations. The old mapping behaviour can be + requested by setting this variable to undef. + + + +2004-06-08 Gisle Aas <gisle@ActiveState.com> + + Release 1.31 + + Added uri_escape_utf8() function to URI::Escape module. + + Fixed abs/rel behaviour for sip: URIs. Fixed by + Ville Skyttä <ville.skytta@iki.fi>. + + Avoid croaking on code like $u->query_form(a => { foo => 1 }). + It will still not really do anything useful. + + + +2004-01-14 Gisle Aas <gisle@ActiveState.com> + + Release 1.30 + + Documentation fixes by Paul Croome <Paul.Croome@softwareag.com>. + + + +2004-01-02 Gisle Aas <gisle@ActiveState.com> + + Release 1.29 + + Added support for the ldapi: and ldaps: schemes. + The ldaps: implementation was contributed by Graham Barr. + + Added support for mms: scheme. Contributed by + Dan Sully <daniel@electricrain.com>. + + + +2003-11-30 Gisle Aas <gisle@ActiveState.com> + + Release 1.28 + + The query_param_delete() method was not able to delete + the last parameter from a form. Similar problem existing + when deleting via query_param(). Patch by <awk@awks.org>. + + The query_form() method now allow an array or hash + reference to be passed to set the value. This makes it + possible to set the value to an empty form, something that + the old API did not allow. + + The query_keywords() method now allow an array reference + to be passed to set the value. + + + +2003-10-06 Gisle Aas <gisle@ActiveState.com> + + Release 1.27 + + The URI module is now less strict about the values accepted + for gopher_type attribute of gopher:-URLs. Patch suggested + by the Net::Gopher author; William G. Davis. + + + +2003-10-03 Gisle Aas <gisle@ActiveState.com> + + Release 1.26 + + Help Storable deal with URI objects. Patch contributed + by <talby@trap.mtview.ca.us>. + + Fix failure under OS/2. Patch contributed by Ilya Zakharevich. + + + +2003-08-18 Gisle Aas <gisle@ActiveState.com> + + Release 1.25 + + Allow literal '@' in userinfo. If there are multiple '@' chars + in the 'authority' component use the last (instead of first) as + the 'userinfo' delimiter. + + Make URI->query_form escape '[' and ']'. These chars where added + to the reserved set in RFC 2732. This also matches MSIE behaviour. + + Silence warning from 'sip' support class. + + + +2003-07-24 Gisle Aas <gisle@ActiveState.com> + + Release 1.24 + + Relative URIs that start with the query string directly (i.e. "?q") + are now absolutized as specified in rfc2396bis. See: + http://www.apache.org/~fielding/uri/rev-2002/issues.html#003-relative-query + + Added URI::Split module. It's a lightweight module that can be + used to parse and compose URI string to/from its component parts. + + The rel() method will now work from canonical URIs. That allow it + to extract a relative URI in more cases. + + + +2003-01-01 Gisle Aas <gisle@ActiveState.com> + + Release 1.23 + + Support for tn3270 URIs. + + Use anchored DNS lookups in URI::Heuristic as suggested + by Malcolm Weir <malc@gelt.org>. + + Delay calculation of MY_COUNTRY() in URI::Heuristic. + Patch by Ed Avis <ed@membled.com>. + + Make test suite work for UNC paths. + Patch by Warren Jones <wjones@fluke.com>. + + + +2002-09-02 Gisle Aas <gisle@ActiveState.com> + + Release 1.22 + + Added URI::QueryParam module. It contains some + extra methods to manipulate the query form + key/value pairs. + + Added support for the sip: and sips: URI scheme. + Contributed by Ryan Kereliuk <ryker@ryker.org>. + + + +2002-08-04 Gisle Aas <gisle@ActiveState.com> + + Release 1.21 + + Restore perl-5.004 and perl-5.005 compatibility. + + + +2002-07-18 Gisle Aas <gisle@ActiveState.com> + + Release 1.20 + + Direct support for some new schemes urn:, urn:isbn:, + urn:oid:, rtsp:, and rtspu:. The rtsp support was + contributed by Matt Selsky <selsky@columbia.edu>. + + Documentation fix for $URI::ABS_REMOTE_LEADING_DOTS. + CPAN-RT-Bug #1224. + + The host for URI::file was not unescaped. + Patch by Ville Skyttä <ville.skytta@iki.fi>. + + + +2002-05-09 Gisle Aas <gisle@ActiveState.com> + + Release 1.19 + + URI::Heuristic will guess better on strings + like "123.3.3.3:8080/foo". It used to think that + the numbers before ":" was a scheme. + + URI::WithBase will not keep the full history of + any base URI's base URI etc. This used to make + these objects grow into to monsters for some + web spiders. + + URI::URL->new("foo", "bar")->base used to return + a "URI" object. Now an URI::URL object is returned + instead. + + Deal properly with file:///-URIs. + + + +2001-12-30 Gisle Aas <gisle@ActiveState.com> + + Release 1.18 + + Added support for ssh: URIs. + Contributed by Jean-Philippe Bouchard <jeanphil@sitepak.com> + + URI::Escape: Make sure cache is not set when the RE + wouldn't compile. Fix suggested by <me-01@ton.iguana.be>. + Applied patch as suggested by Randal L. Schwartz. + + Don't try to come up with the e-mail address of the user as + the anonymous password. + Patch by Eduardo Pérez <eperez@dei.inf.uc3m.es>. + + + +2001-09-14 Gisle Aas <gisle@ActiveState.com> + + Release 1.17 + + Fixed unescape of %30 in $http_uri->canonical. + + Fixed test failure for t/heuristic.t on cygwin. + + Fixed warning noise from t/old-base.t on bleadperl. + Perl now warns for pack("c*", $i) when $i > 127. + + + +2001-08-27 Gisle Aas <gisle@ActiveState.com> + + Release 1.16 + + URI::Escape::uri_escape default has changed. Reserved + characters are now escaped when no second argument is + provided. + + The perl5.004 backwards compatibility patching taking place + in the Makefile.PL should now work for MacPerl. + Patch by KIMURA Takeshi <kim@ga2.so-net.ne.jp>. + + URI::WithBase now overrides the can() method and delegate it to + the URI member. This also affects the URI::URL behaviour. + Patch by Sean M. Burke <sburke@cpan.org>. + + + +2001-07-19 Gisle Aas <gisle@ActiveState.com> + + Release 1.15 + + [This release was made just to document the changes that went + into the (unreleased) URI-1.13 but never made it into this + change-log. There is no functional difference between the 1.14 + and 1.15 release.] + + + +2001-07-18 Gisle Aas <gisle@ActiveState.com> + + Release 1.14 + + The module failed on perl5.004 because \z is not supported + in regexps. The Makefile.PL will now try to patch the module + to be compatible. + + + +2001-05-15 Gisle Aas <gisle@ActiveState.com> + + Release 1.13 (never made it to CPAN) + + URI.pm now conforms to RFC 2732 which specify how literal IPv6 + addresses are to be included in URLs. + + URI/Escape now allows "/" in the $unsafe pattern argument. + + + +2001-04-23 Gisle Aas <gisle@ActiveState.com> + + Release 1.12 + + URI->new($u, $scheme) does no longer fail if given a badly + formatted scheme string. + + URI::WithBase's clone and base method was basically just + broken. This also affected the URI::URL subclass. + The clone() method did not copy the base, and updating + the base with the base method always set it to "1". + + + +2001-02-27 Gisle Aas <gisle@ActiveState.com> + + Release 1.11 + + The t/heuristic.t test relied on the fact that 'www.perl.no' + was not registered in DNS. This is no longer true. + The penguins at Bouvet Island will hopefully be ignorant + of Perl forever. + + + +2001-01-10 Gisle Aas <gisle@ActiveState.com> + + Release 1.10 + + The $u->query_form method will now escape spaces in + form keys or values as '+' (instead of '%20'). This also + affect the $mailto_uri->header() method. This is actually + the wrong thing to do, but this practise is now even + documented in official places like + http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 + so we might as well follow the stream. + + URI::Heuristic did not work for domain-names with dashes '-' + in them. Fixed. + + Documented that $uri->xxx($1) might not work. + + + +2000-08-16 Gisle Aas <gisle@ActiveState.com> + + Release 1.09 + + uri_unescape() did not work when given multiple strings + to decode. Patch by Nicholas Clark <nick@ccl4.org>. + + + +2000-08-02 Gisle Aas <gisle@ActiveState.com> + + Release 1.08 + + ldap URIs now support _scope() and _filter() methods that + don't have default values. Suggested by Graham Barr. + + Incorporated old rejected MSWin32 patch to t/old-base.t. + Hope it works. + + + +2000-06-13 Gisle Aas <gisle@ActiveState.com> + + Release 1.07 + + URI::WithBase (and URI::URL) now support $u->new_abs + constructor. + + URI::WithBase->new("foo", "URI::URL") bug fixed. + + + +2000-04-09 Gisle Aas <gisle@aas.no> + + Release 1.06 + + Clean test/install on VMS. + Patch by Charles Lane <lane@DUPHY4.Physics.Drexel.Edu> + + + +2000-02-14 Gisle Aas <gisle@aas.no> + + Release 1.05 + + QNX file support by Norton Allen <allen@huarp.harvard.edu>. + + Support for rsync:-URI by Dave Beckett <D.J.Beckett@ukc.ac.uk> + + + +1999-08-03 Gisle Aas <gisle@aas.no> + + Release 1.04 + + Avoid testing for defined(@ISA) and defined(%class::). Patch + by Nathan Torkington <gnat@frii.com>. + + $uri->abs() did wrong when the fragment contained a "?" + character. + + Typo in URI::ldap spotted by Graham Barr. + + + +1999-06-24 Gisle Aas <gisle@aas.no> + + Release 1.03 + + Escape all reserved query characters in the individual components + of $uri->query_form and $uri->query_keywords. + + Make compatibility URI::URL->new("mailto:gisle@aas.no")->netloc + work again. + + + +1999-03-26 Gisle Aas <gisle@aas.no> + + Release 1.02 + + Added URI::ldap. Contributed by Graham Barr <gbarr@pobox.com>. + + Documentation update. + + + +1999-03-20 Gisle Aas <gisle@aas.no> + + Release 1.01 + + MacOS patches from Paul J. Schinder <schinder@leprss.gsfc.nasa.gov> + + Documentation patch from Michael A. Chase <mchase@ix.netcom.com> + + + +1998-11-19 Gisle Aas <aas@sn.no> + + Release 1.00 + + Added new URI->new_abs method + + Replaced a few die calls with croak. + + + +1998-10-12 Gisle Aas <aas@sn.no> + + Release 0.90_02 + + Implemented new $uri->host_port method. + + $uri->epath and $uri->equery aliases to make URI::URL + compatibility easier. + + + +1998-09-23 Gisle Aas <aas@sn.no> + + Release 0.90_01 + + New README + + Makefile.PL list MIME::Base64 as PREREQ_PM + + Original $scheme argument not passed to _init() method. + + Automatically add scheme to empty URIs where the scheme + is required: URI->new("", "data") + + Documentation update. + + New URI::URL::strict implementation. + + + +1998-09-22 Gisle Aas <aas@sn.no> + + Release 0.09_02 + + New internal URI::file::* interface. Implemented 8.3 mapping + for "dos". + + Got rid of $URI::STRICT and $URI::DEFAULT_SCHEME + + More documentation. + + + +1998-09-13 Gisle Aas <aas@sn.no> + + Release 0.09_01 + + Use version number with underscore to avoid that the CPAN + indexer hides the URI::URL from libwww-perl that contains + all the documentation. + + Started to document the new modules. + + URI::file->new() escape fix which allow Mac file names like + ::.. to be treated as they should (I think). + + + +1998-09-12 Gisle Aas <aas@sn.no> + + Release 0.09 + + Included URI::Escape and URI::Heuristic from LWP. URI::Escape + updated with new default set of characters to escape (according + to RFC 2396) and a faster uri_unescape() function. URI::Heuristic + updated with a new function that returns an URI object. + + First argument to URI->new is always treated as a string now. + + URI->new("", URI::WithBase("foo:")) now works. It returns an + URI::WithBase object. + + Included Roy T. Fielding's URI parsing/abs tests from + <http://www.ics.uci.edu/~fielding/url/>. We did in fact agree + with RFC 2396 on all tests. + + Allow authority "A|" in Win32 file:-URIs to denote A:. Treat + escaped chars. + + + +1998-09-10 Gisle Aas <aas@sn.no> + + Release 0.08 + + Implemented transformations between various file: URIs and + actual file names. New URI::file methods: + + new + new_abs + cwd + file + dir + + + +1998-09-09 Gisle Aas <aas@sn.no> + + Release 0.07 + + Implemented rlogin, telnet and file URLs. + + Implemented URI::WithBase + + Implemented URI::URL emulator (ported old URI::URL test suite) + + Can now use schemes with "-", "+" or "." characters in them. + + $u->scheme will downcase. $u->_scheme will keep it as it is. + + Configuration variables for $u->abs + + $u->query_form and $u->query_keyword is more careful about escaping + "+" and "=". + + $u->host unescaped + + $u->_port if you want to bypass $u->default_port + + Can handle news message-ids with embedded "/" now + + + +1998-09-08 Gisle Aas <aas@sn.no> + + Release 0.06 + + Implemented gopher URLs + + Implemented ftp URLs + + Second ctor argument can be a plain scheme name. If it is an + object, then we use the class of the object as implementor. + + Protect literal % in various places by escaping + + Path segments with parameters are not arrays of class URI::_segment, + which overloads the stringify operator. + + URI::http->canonical will now unescape unreserved characters. + + + +1998-09-08 Gisle Aas <aas@sn.no> + + Release 0.05 + + Implemented news URLs (together with snews/nntp) + + Implemented pop URLs (RFC 2384) + + Can now use '==' to compare if two URI objects are the same or not. + + $u->opaque_part renamed as $u->opaque + + Better canonicalization + + Faster $u->abs (especially for URI that already are absolute) + + $u->query_form will keep more chars unescaped + + + +1998-09-06 Gisle Aas <aas@sn.no> + + Release 0.04 + + Implemented mailto:-URLs (specified in RFC 2368) + + Moved query() methods to internal URI::_query mixin class. + + Escape stuff in the media_type field of data:-URLs. + + + +1998-09-06 Gisle Aas <aas@sn.no> + + Release 0.03 based on simplified scalar object. + + + +1998-09-02 Gisle Aas <aas@sn.no> + + Release 0.02 based on perl5.005 and fields.pm + + + +1998-04-10 Gisle Aas <aas@sn.no> + + Release 0.01 @@ -0,0 +1,379 @@ +This software is copyright (c) 1998 by Gisle Aas. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +Terms of the Perl programming language system itself + +a) the GNU General Public License as published by the Free + Software Foundation; either version 1, or (at your option) any + later version, or +b) the "Artistic License" + +--- The GNU General Public License, Version 1, February 1989 --- + +This software is Copyright (c) 1998 by Gisle Aas. + +This is free software, licensed under: + + The GNU General Public License, Version 1, February 1989 + + GNU GENERAL PUBLIC LICENSE + Version 1, February 1989 + + Copyright (C) 1989 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The license agreements of most software companies try to keep users +at the mercy of those companies. By contrast, our General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. The +General Public License applies to the Free Software Foundation's +software and to any other program whose authors commit to using it. +You can use it for your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Specifically, the General Public License is designed to make +sure that you have the freedom to give away or sell copies of free +software, that you receive source code or can get it if you want it, +that you can change the software or use pieces of it in new free +programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of a such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must tell them their rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any program or other work which +contains a notice placed by the copyright holder saying it may be +distributed under the terms of this General Public License. The +"Program", below, refers to any such program or work, and a "work based +on the Program" means either the Program or any work containing the +Program or a portion of it, either verbatim or with modifications. Each +licensee is addressed as "you". + + 1. You may copy and distribute verbatim copies of the Program's source +code as you receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice and +disclaimer of warranty; keep intact all the notices that refer to this +General Public License and to the absence of any warranty; and give any +other recipients of the Program a copy of this General Public License +along with the Program. You may charge a fee for the physical act of +transferring a copy. + + 2. You may modify your copy or copies of the Program or any portion of +it, and copy and distribute such modifications under the terms of Paragraph +1 above, provided that you also do the following: + + a) cause the modified files to carry prominent notices stating that + you changed the files and the date of any change; and + + b) cause the whole of any work that you distribute or publish, that + in whole or in part contains the Program or any part thereof, either + with or without modifications, to be licensed at no charge to all + third parties under the terms of this General Public License (except + that you may choose to grant warranty protection to some or all + third parties, at your option). + + c) If the modified program normally reads commands interactively when + run, you must cause it, when started running for such interactive use + in the simplest and most usual way, to print or display an + announcement including an appropriate copyright notice and a notice + that there is no warranty (or else, saying that you provide a + warranty) and that users may redistribute the program under these + conditions, and telling the user how to view a copy of this General + Public License. + + d) You may charge a fee for the physical act of transferring a + copy, and you may at your option offer warranty protection in + exchange for a fee. + +Mere aggregation of another independent work with the Program (or its +derivative) on a volume of a storage or distribution medium does not bring +the other work under the scope of these terms. + + 3. You may copy and distribute the Program (or a portion or derivative of +it, under Paragraph 2) in object code or executable form under the terms of +Paragraphs 1 and 2 above provided that you also do one of the following: + + a) accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of + Paragraphs 1 and 2 above; or, + + b) accompany it with a written offer, valid for at least three + years, to give any third party free (except for a nominal charge + for the cost of distribution) a complete machine-readable copy of the + corresponding source code, to be distributed under the terms of + Paragraphs 1 and 2 above; or, + + c) accompany it with the information you received as to where the + corresponding source code may be obtained. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form alone.) + +Source code for a work means the preferred form of the work for making +modifications to it. For an executable file, complete source code means +all the source code for all modules it contains; but, as a special +exception, it need not include source code for modules which are standard +libraries that accompany the operating system on which the executable +file runs, or for standard header files or definitions files that +accompany that operating system. + + 4. You may not copy, modify, sublicense, distribute or transfer the +Program except as expressly provided under this General Public License. +Any attempt otherwise to copy, modify, sublicense, distribute or transfer +the Program is void, and will automatically terminate your rights to use +the Program under this License. However, parties who have received +copies, or rights to use copies, from you under this General Public +License will not have their licenses terminated so long as such parties +remain in full compliance. + + 5. By copying, distributing or modifying the Program (or any work based +on the Program) you indicate your acceptance of this license to do so, +and all its terms and conditions. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the original +licensor to copy, distribute or modify the Program subject to these +terms and conditions. You may not impose any further restrictions on the +recipients' exercise of the rights granted herein. + + 7. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of the license which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +the license, you may choose any version ever published by the Free Software +Foundation. + + 8. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to humanity, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + + To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey +the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) 19yy <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19xx name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the +appropriate parts of the General Public License. Of course, the +commands you use may be called something other than `show w' and `show +c'; they could even be mouse-clicks or menu items--whatever suits your +program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + program `Gnomovision' (a program to direct compilers to make passes + at assemblers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +That's all there is to it! + + +--- The Artistic License 1.0 --- + +This software is Copyright (c) 1998 by Gisle Aas. + +This is free software, licensed under: + + The Artistic License 1.0 + +The Artistic License + +Preamble + +The intent of this document is to state the conditions under which a Package +may be copied, such that the Copyright Holder maintains some semblance of +artistic control over the development of the package, while giving the users of +the package the right to use and distribute the Package in a more-or-less +customary fashion, plus the right to make reasonable modifications. + +Definitions: + + - "Package" refers to the collection of files distributed by the Copyright + Holder, and derivatives of that collection of files created through + textual modification. + - "Standard Version" refers to such a Package if it has not been modified, + or has been modified in accordance with the wishes of the Copyright + Holder. + - "Copyright Holder" is whoever is named in the copyright or copyrights for + the package. + - "You" is you, if you're thinking about copying or distributing this Package. + - "Reasonable copying fee" is whatever you can justify on the basis of media + cost, duplication charges, time of people involved, and so on. (You will + not be required to justify it to the Copyright Holder, but only to the + computing community at large as a market that must bear the fee.) + - "Freely Available" means that no fee is charged for the item itself, though + there may be fees involved in handling the item. It also means that + recipients of the item may redistribute it under the same conditions they + received it. + +1. You may make and give away verbatim copies of the source form of the +Standard Version of this Package without restriction, provided that you +duplicate all of the original copyright notices and associated disclaimers. + +2. You may apply bug fixes, portability fixes and other modifications derived +from the Public Domain or from the Copyright Holder. A Package modified in such +a way shall still be considered the Standard Version. + +3. You may otherwise modify your copy of this Package in any way, provided that +you insert a prominent notice in each changed file stating how and when you +changed that file, and provided that you do at least ONE of the following: + + a) place your modifications in the Public Domain or otherwise make them + Freely Available, such as by posting said modifications to Usenet or an + equivalent medium, or placing the modifications on a major archive site + such as ftp.uu.net, or by allowing the Copyright Holder to include your + modifications in the Standard Version of the Package. + + b) use the modified Package only within your corporation or organization. + + c) rename any non-standard executables so the names do not conflict with + standard executables, which must also be provided, and provide a separate + manual page for each non-standard executable that clearly documents how it + differs from the Standard Version. + + d) make other distribution arrangements with the Copyright Holder. + +4. You may distribute the programs of this Package in object code or executable +form, provided that you do at least ONE of the following: + + a) distribute a Standard Version of the executables and library files, + together with instructions (in the manual page or equivalent) on where to + get the Standard Version. + + b) accompany the distribution with the machine-readable source of the Package + with your modifications. + + c) accompany any non-standard executables with their corresponding Standard + Version executables, giving the non-standard executables non-standard + names, and clearly documenting the differences in manual pages (or + equivalent), together with instructions on where to get the Standard + Version. + + d) make other distribution arrangements with the Copyright Holder. + +5. You may charge a reasonable copying fee for any distribution of this +Package. You may charge any fee you choose for support of this Package. You +may not charge a fee for this Package itself. However, you may distribute this +Package in aggregate with other (possibly commercial) programs as part of a +larger (possibly commercial) software distribution provided that you do not +advertise this Package as a product of your own. + +6. The scripts and library files supplied as input to or produced as output +from the programs of this Package do not automatically fall under the copyright +of this Package, but belong to whomever generated them, and may be sold +commercially, and may be aggregated with this Package. + +7. C or perl subroutines supplied by you and linked into this Package shall not +be considered part of this Package. + +8. The name of the Copyright Holder may not be used to endorse or promote +products derived from this software without specific prior written permission. + +9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +The End + diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..a7ff67a --- /dev/null +++ b/MANIFEST @@ -0,0 +1,109 @@ +Changes +lib/URI.pm +lib/URI/_foreign.pm +lib/URI/_generic.pm +lib/URI/_idna.pm +lib/URI/_ldap.pm +lib/URI/_login.pm +lib/URI/_punycode.pm +lib/URI/_query.pm +lib/URI/_segment.pm +lib/URI/_server.pm +lib/URI/_userpass.pm +lib/URI/data.pm +lib/URI/Escape.pm +lib/URI/file.pm +lib/URI/file/Base.pm +lib/URI/file/FAT.pm +lib/URI/file/Mac.pm +lib/URI/file/OS2.pm +lib/URI/file/QNX.pm +lib/URI/file/Unix.pm +lib/URI/file/Win32.pm +lib/URI/ftp.pm +lib/URI/gopher.pm +lib/URI/Heuristic.pm +lib/URI/http.pm +lib/URI/https.pm +lib/URI/IRI.pm +lib/URI/ldap.pm +lib/URI/ldapi.pm +lib/URI/ldaps.pm +lib/URI/mailto.pm +lib/URI/mms.pm +lib/URI/news.pm +lib/URI/nntp.pm +lib/URI/pop.pm +lib/URI/QueryParam.pm +lib/URI/rlogin.pm +lib/URI/rsync.pm +lib/URI/rtsp.pm +lib/URI/rtspu.pm +lib/URI/sftp.pm +lib/URI/sip.pm +lib/URI/sips.pm +lib/URI/snews.pm +lib/URI/Split.pm +lib/URI/ssh.pm +lib/URI/telnet.pm +lib/URI/tn3270.pm +lib/URI/URL.pm +lib/URI/urn.pm +lib/URI/urn/isbn.pm +lib/URI/urn/oid.pm +lib/URI/WithBase.pm +LICENSE +Makefile.PL +MANIFEST +README +t/abs.t +t/clone.t +t/cwd.t +t/data.t +t/distmanifest.t +t/escape-char.t +t/escape.t +t/file.t +t/ftp.t +t/generic.t +t/gopher.t +t/heuristic.t +t/http.t +t/idna.t +t/iri.t +t/ldap.t +t/mailto.t +t/mix.t +t/mms.t +t/news.t +t/num_eq.t +t/old-absconf.t +t/old-base.t +t/old-file.t +t/old-relbase.t +t/path-segments.t +t/pop.t +t/punycode.t +t/query-param.t +t/query.t +t/rel.t +t/rfc2732.t +t/roy-test.t +t/roytest1.html +t/roytest2.html +t/roytest3.html +t/roytest4.html +t/roytest5.html +t/rsync.t +t/rtsp.t +t/sip.t +t/sort-hash-query-form.t +t/split.t +t/storable-test.pl +t/storable.t +t/urn-isbn.t +t/urn-oid.t +t/utf8.t +uri-test +META.yml Module YAML meta-data (added by MakeMaker) +META.json Module JSON meta-data (added by MakeMaker) diff --git a/META.json b/META.json new file mode 100644 index 0000000..6635cb6 --- /dev/null +++ b/META.json @@ -0,0 +1,91 @@ +{ + "abstract" : "Uniform Resource Identifiers (absolute and relative)", + "author" : [ + "Gisle Aas <gisle@activestate.com>" + ], + "dynamic_config" : 0, + "generated_by" : "ExtUtils::MakeMaker version 7.0525, CPAN::Meta::Converter version 2.150005", + "license" : [ + "perl_5" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "name" : "URI", + "no_index" : { + "directory" : [ + "t", + "inc" + ] + }, + "prereqs" : { + "build" : { + "requires" : {} + }, + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "runtime" : { + "requires" : { + "Exporter" : "5.57", + "MIME::Base64" : "2", + "Scalar::Util" : "0", + "parent" : "0", + "perl" : "5.008001", + "utf8" : "0" + } + }, + "test" : { + "requires" : { + "File::Temp" : "0", + "Test::More" : "0.96" + } + } + }, + "release_status" : "stable", + "resources" : { + "bugtracker" : { + "mailto" : "bug-URI@rt.cpan.org", + "web" : "https://rt.cpan.org/Public/Dist/Display.html?Name=URI" + }, + "repository" : { + "type" : "git", + "url" : "https://github.com/libwww-perl/uri.git", + "web" : "https://github.com/libwww-perl/uri" + }, + "x_IRC" : "irc://irc.perl.org/#lwp", + "x_MailingList" : "mailto:libwww@perl.org" + }, + "version" : "1.69", + "x_authority" : "cpan:GAAS", + "x_contributors" : [ + "Gisle Aas <gisle@aas.no>", + "Karen Etheridge <ether@cpan.org>", + "Ville Skyttä <ville.skytta@iki.fi>", + "Mark Stosberg <mark@stosberg.com>", + "Michael G. Schwern <schwern@pobox.com>", + "Olaf Alders <olaf@wundersolutions.com>", + "Slaven Rezic <slaven@rezic.de>", + "Matt Lawrence <matthewlawrence@venda.com>", + "Peter Rabbitson <ribasushi@cpan.org>", + "Piotr Roszatycki <piotr.roszatycki@gmail.com>", + "Salvatore Bonaccorso <carnil@launchpad.net>", + "Tatsuhiko Miyagawa <miyagawa@bulknews.net>", + "Torsten Förtsch <torsten.foertsch@gmx.net>", + "Adam Herzog <adam@adamherzog.com>", + "gerard <gerard@tty.nl>", + "Alex Kapranoff <kapranoff@gmail.com>", + "Brendan Byrd <Perl@ResonatorSoft.org>", + "David Schmidt <davewood@gmx.at>", + "Jan Dubois <jand@activestate.com>", + "John Miller <john@rimmkaufman.com>", + "Kenichi Ishigaki <ishigaki@cpan.org>", + "Kent Fredric <kentfredric@gmail.com>", + "Masahiro Honma <hiratara@cpan.org>" + ], + "x_serialization_backend" : "JSON::PP version 2.27300" +} + diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..00d9229 --- /dev/null +++ b/META.yml @@ -0,0 +1,60 @@ +--- +abstract: 'Uniform Resource Identifiers (absolute and relative)' +author: + - 'Gisle Aas <gisle@activestate.com>' +build_requires: + File::Temp: '0' + Test::More: '0.96' +configure_requires: + ExtUtils::MakeMaker: '0' +dynamic_config: 0 +generated_by: 'ExtUtils::MakeMaker version 7.0525, CPAN::Meta::Converter version 2.150005' +license: perl +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: '1.4' +name: URI +no_index: + directory: + - t + - inc +requires: + Exporter: '5.57' + MIME::Base64: '2' + Scalar::Util: '0' + parent: '0' + perl: '5.008001' + utf8: '0' +resources: + IRC: irc://irc.perl.org/#lwp + MailingList: mailto:libwww@perl.org + bugtracker: https://rt.cpan.org/Public/Dist/Display.html?Name=URI + repository: https://github.com/libwww-perl/uri.git +version: '1.69' +x_authority: cpan:GAAS +x_contributors: + - 'Gisle Aas <gisle@aas.no>' + - 'Karen Etheridge <ether@cpan.org>' + - 'Ville Skyttä <ville.skytta@iki.fi>' + - 'Mark Stosberg <mark@stosberg.com>' + - 'Michael G. Schwern <schwern@pobox.com>' + - 'Olaf Alders <olaf@wundersolutions.com>' + - 'Slaven Rezic <slaven@rezic.de>' + - 'Matt Lawrence <matthewlawrence@venda.com>' + - 'Peter Rabbitson <ribasushi@cpan.org>' + - 'Piotr Roszatycki <piotr.roszatycki@gmail.com>' + - 'Salvatore Bonaccorso <carnil@launchpad.net>' + - 'Tatsuhiko Miyagawa <miyagawa@bulknews.net>' + - 'Torsten Förtsch <torsten.foertsch@gmx.net>' + - 'Adam Herzog <adam@adamherzog.com>' + - 'gerard <gerard@tty.nl>' + - 'Alex Kapranoff <kapranoff@gmail.com>' + - 'Brendan Byrd <Perl@ResonatorSoft.org>' + - 'David Schmidt <davewood@gmx.at>' + - 'Jan Dubois <jand@activestate.com>' + - 'John Miller <john@rimmkaufman.com>' + - 'Kenichi Ishigaki <ishigaki@cpan.org>' + - 'Kent Fredric <kentfredric@gmail.com>' + - 'Masahiro Honma <hiratara@cpan.org>' +x_serialization_backend: 'CPAN::Meta::YAML version 0.016' + diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..a8729d4 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,138 @@ +use strict; +use warnings; +require 5.008001; +use utf8; +use ExtUtils::MakeMaker; + +my $developer = -f '.gitignore'; +ExtUtils::MakeMaker->VERSION(6.98) if $developer; + +my %WriteMakefileArgs = ( + NAME => 'URI', + VERSION_FROM => 'lib/URI.pm', + ABSTRACT_FROM => 'lib/URI.pm', + AUTHOR => 'Gisle Aas <gisle@activestate.com>', + LICENSE => 'perl_5', + + META_MERGE => { + 'meta-spec' => { version => 2 }, + dynamic_config => 0, + resources => { + repository => { + url => 'https://github.com/libwww-perl/uri.git', + web => 'https://github.com/libwww-perl/uri', + type => 'git', + }, + bugtracker => { + mailto => 'bug-URI@rt.cpan.org', + web => 'https://rt.cpan.org/Public/Dist/Display.html?Name=URI', + }, + x_MailingList => 'mailto:libwww@perl.org', + x_IRC => 'irc://irc.perl.org/#lwp', + }, + x_authority => 'cpan:GAAS', + x_contributors => [ # manually added, from git shortlog -e -s -n + 'Gisle Aas <gisle@aas.no>', + 'Karen Etheridge <ether@cpan.org>', + 'Ville Skyttä <ville.skytta@iki.fi>', + 'Mark Stosberg <mark@stosberg.com>', + 'Michael G. Schwern <schwern@pobox.com>', + 'Olaf Alders <olaf@wundersolutions.com>', + 'Slaven Rezic <slaven@rezic.de>', + 'Matt Lawrence <matthewlawrence@venda.com>', + 'Peter Rabbitson <ribasushi@cpan.org>', + 'Piotr Roszatycki <piotr.roszatycki@gmail.com>', + 'Salvatore Bonaccorso <carnil@launchpad.net>', + 'Tatsuhiko Miyagawa <miyagawa@bulknews.net>', + 'Torsten Förtsch <torsten.foertsch@gmx.net>', + 'Adam Herzog <adam@adamherzog.com>', + 'gerard <gerard@tty.nl>', + 'Alex Kapranoff <kapranoff@gmail.com>', + 'Brendan Byrd <Perl@ResonatorSoft.org>', + 'David Schmidt <davewood@gmx.at>', + 'Jan Dubois <jand@activestate.com>', + 'John Miller <john@rimmkaufman.com>', + 'Kenichi Ishigaki <ishigaki@cpan.org>', + 'Kent Fredric <kentfredric@gmail.com>', + 'Masahiro Honma <hiratara@cpan.org>', + ], + }, + + META_ADD => { + prereqs => { + configure => { + requires => { + 'ExtUtils::MakeMaker' => '0', + }, + }, + runtime => { + requires => { + 'MIME::Base64' => '2', + 'parent' => '0', + 'Exporter' => '5.57', + 'utf8' => '0', + 'Scalar::Util' => '0', + 'perl' => '5.008001', + }, + recommends => { + 'Business::ISBN' => '0', + }, + }, + test => { + requires => { + 'Test::More' => '0.96', + 'File::Temp' => '0', + }, + }, + }, + }, +); + +my $eumm_version = eval $ExtUtils::MakeMaker::VERSION; + +for (qw(configure build test runtime)) { + my $key = $_ eq 'runtime' ? 'PREREQ_PM' : uc $_.'_REQUIRES'; + next unless exists $WriteMakefileArgs{META_ADD}{prereqs}{$_} + or exists $WriteMakefileArgs{$key}; + my $r = $WriteMakefileArgs{$key} = { + %{$WriteMakefileArgs{META_ADD}{prereqs}{$_}{requires} || {}}, + %{delete $WriteMakefileArgs{$key} || {}}, + }; + defined $r->{$_} or delete $r->{$_} for keys %$r; +} + +# dynamic prereqs get added here. + +$WriteMakefileArgs{MIN_PERL_VERSION} = delete $WriteMakefileArgs{PREREQ_PM}{perl} || 0; + +die 'attention developer: you need to do a sane meta merge here!' + if keys %{$WriteMakefileArgs{BUILD_REQUIRES}}; + +$WriteMakefileArgs{BUILD_REQUIRES} = { + %{$WriteMakefileArgs{BUILD_REQUIRES} || {}}, + %{delete $WriteMakefileArgs{TEST_REQUIRES}} +} if $eumm_version < 6.63_03; + +$WriteMakefileArgs{PREREQ_PM} = { + %{$WriteMakefileArgs{PREREQ_PM}}, + %{delete $WriteMakefileArgs{BUILD_REQUIRES}} +} if $eumm_version < 6.55_01; + +delete $WriteMakefileArgs{CONFIGURE_REQUIRES} + if $eumm_version < 6.51_03; + +delete $WriteMakefileArgs{MIN_PERL_VERSION} + if $eumm_version < 6.48; + +delete @WriteMakefileArgs{qw(META_ADD META_MERGE)} + if $eumm_version < 6.46; + +delete $WriteMakefileArgs{LICENSE} + if $eumm_version < 6.31; + +WriteMakefile(%WriteMakefileArgs); + +# pod2text is in https://metacpan.org/release/podlators +system("pod2text $WriteMakefileArgs{VERSION_FROM} > README") + if $developer + and (not -e 'README' or (stat('README'))[9] < (stat($WriteMakefileArgs{VERSION_FROM}))[9]); @@ -0,0 +1,667 @@ +NAME + URI - Uniform Resource Identifiers (absolute and relative) + +SYNOPSIS + $u1 = URI->new("http://www.perl.com"); + $u2 = URI->new("foo", "http"); + $u3 = $u2->abs($u1); + $u4 = $u3->clone; + $u5 = URI->new("HTTP://WWW.perl.com:80")->canonical; + + $str = $u->as_string; + $str = "$u"; + + $scheme = $u->scheme; + $opaque = $u->opaque; + $path = $u->path; + $frag = $u->fragment; + + $u->scheme("ftp"); + $u->host("ftp.perl.com"); + $u->path("cpan/"); + +DESCRIPTION + This module implements the "URI" class. Objects of this class represent + "Uniform Resource Identifier references" as specified in RFC 2396 (and + updated by RFC 2732). + + A Uniform Resource Identifier is a compact string of characters that + identifies an abstract or physical resource. A Uniform Resource + Identifier can be further classified as either a Uniform Resource + Locator (URL) or a Uniform Resource Name (URN). The distinction between + URL and URN does not matter to the "URI" class interface. A + "URI-reference" is a URI that may have additional information attached + in the form of a fragment identifier. + + An absolute URI reference consists of three parts: a *scheme*, a + *scheme-specific part* and a *fragment* identifier. A subset of URI + references share a common syntax for hierarchical namespaces. For these, + the scheme-specific part is further broken down into *authority*, *path* + and *query* components. These URIs can also take the form of relative + URI references, where the scheme (and usually also the authority) + component is missing, but implied by the context of the URI reference. + The three forms of URI reference syntax are summarized as follows: + + <scheme>:<scheme-specific-part>#<fragment> + <scheme>://<authority><path>?<query>#<fragment> + <path>?<query>#<fragment> + + The components into which a URI reference can be divided depend on the + *scheme*. The "URI" class provides methods to get and set the individual + components. The methods available for a specific "URI" object depend on + the scheme. + +CONSTRUCTORS + The following methods construct new "URI" objects: + + $uri = URI->new( $str ) + $uri = URI->new( $str, $scheme ) + Constructs a new URI object. The string representation of a URI is + given as argument, together with an optional scheme specification. + Common URI wrappers like "" and <>, as well as leading and trailing + white space, are automatically removed from the $str argument before + it is processed further. + + The constructor determines the scheme, maps this to an appropriate + URI subclass, constructs a new object of that class and returns it. + + If the scheme isn't one of those that URI recognizes, you still get + an URI object back that you can access the generic methods on. The + "$uri->has_recognized_scheme" method can be used to test for this. + + The $scheme argument is only used when $str is a relative URI. It + can be either a simple string that denotes the scheme, a string + containing an absolute URI reference, or an absolute "URI" object. + If no $scheme is specified for a relative URI $str, then $str is + simply treated as a generic URI (no scheme-specific methods + available). + + The set of characters available for building URI references is + restricted (see URI::Escape). Characters outside this set are + automatically escaped by the URI constructor. + + $uri = URI->new_abs( $str, $base_uri ) + Constructs a new absolute URI object. The $str argument can denote a + relative or absolute URI. If relative, then it is absolutized using + $base_uri as base. The $base_uri must be an absolute URI. + + $uri = URI::file->new( $filename ) + $uri = URI::file->new( $filename, $os ) + Constructs a new *file* URI from a file name. See URI::file. + + $uri = URI::file->new_abs( $filename ) + $uri = URI::file->new_abs( $filename, $os ) + Constructs a new absolute *file* URI from a file name. See + URI::file. + + $uri = URI::file->cwd + Returns the current working directory as a *file* URI. See + URI::file. + + $uri->clone + Returns a copy of the $uri. + +COMMON METHODS + The methods described in this section are available for all "URI" + objects. + + Methods that give access to components of a URI always return the old + value of the component. The value returned is "undef" if the component + was not present. There is generally a difference between a component + that is empty (represented as "") and a component that is missing + (represented as "undef"). If an accessor method is given an argument, it + updates the corresponding component in addition to returning the old + value of the component. Passing an undefined argument removes the + component (if possible). The description of each accessor method + indicates whether the component is passed as an escaped + (percent-encoded) or an unescaped string. A component that can be + further divided into sub-parts are usually passed escaped, as unescaping + might change its semantics. + + The common methods available for all URI are: + + $uri->scheme + $uri->scheme( $new_scheme ) + Sets and returns the scheme part of the $uri. If the $uri is + relative, then $uri->scheme returns "undef". If called with an + argument, it updates the scheme of $uri, possibly changing the class + of $uri, and returns the old scheme value. The method croaks if the + new scheme name is illegal; a scheme name must begin with a letter + and must consist of only US-ASCII letters, numbers, and a few + special marks: ".", "+", "-". This restriction effectively means + that the scheme must be passed unescaped. Passing an undefined + argument to the scheme method makes the URI relative (if possible). + + Letter case does not matter for scheme names. The string returned by + $uri->scheme is always lowercase. If you want the scheme just as it + was written in the URI in its original case, you can use the + $uri->_scheme method instead. + + $uri->has_recognized_scheme + Returns TRUE if the URI scheme is one that URI recognizes. + + It will also be TRUE for relative URLs where a recognized scheme was + provided to the constructor, even if "$uri->scheme" returns "undef" + for these. + + $uri->opaque + $uri->opaque( $new_opaque ) + Sets and returns the scheme-specific part of the $uri (everything + between the scheme and the fragment) as an escaped string. + + $uri->path + $uri->path( $new_path ) + Sets and returns the same value as $uri->opaque unless the URI + supports the generic syntax for hierarchical namespaces. In that + case the generic method is overridden to set and return the part of + the URI between the *host name* and the *fragment*. + + $uri->fragment + $uri->fragment( $new_frag ) + Returns the fragment identifier of a URI reference as an escaped + string. + + $uri->as_string + Returns a URI object to a plain ASCII string. URI objects are also + converted to plain strings automatically by overloading. This means + that $uri objects can be used as plain strings in most Perl + constructs. + + $uri->as_iri + Returns a Unicode string representing the URI. Escaped UTF-8 + sequences representing non-ASCII characters are turned into their + corresponding Unicode code point. + + $uri->canonical + Returns a normalized version of the URI. The rules for normalization + are scheme-dependent. They usually involve lowercasing the scheme + and Internet host name components, removing the explicit port + specification if it matches the default port, uppercasing all escape + sequences, and unescaping octets that can be better represented as + plain characters. + + For efficiency reasons, if the $uri is already in normalized form, + then a reference to it is returned instead of a copy. + + $uri->eq( $other_uri ) + URI::eq( $first_uri, $other_uri ) + Tests whether two URI references are equal. URI references that + normalize to the same string are considered equal. The method can + also be used as a plain function which can also test two string + arguments. + + If you need to test whether two "URI" object references denote the + same object, use the '==' operator. + + $uri->abs( $base_uri ) + Returns an absolute URI reference. If $uri is already absolute, then + a reference to it is simply returned. If the $uri is relative, then + a new absolute URI is constructed by combining the $uri and the + $base_uri, and returned. + + $uri->rel( $base_uri ) + Returns a relative URI reference if it is possible to make one that + denotes the same resource relative to $base_uri. If not, then $uri + is simply returned. + + $uri->secure + Returns a TRUE value if the URI is considered to point to a resource + on a secure channel, such as an SSL or TLS encrypted one. + +GENERIC METHODS + The following methods are available to schemes that use the + common/generic syntax for hierarchical namespaces. The descriptions of + schemes below indicate which these are. Unrecognized schemes are assumed + to support the generic syntax, and therefore the following methods: + + $uri->authority + $uri->authority( $new_authority ) + Sets and returns the escaped authority component of the $uri. + + $uri->path + $uri->path( $new_path ) + Sets and returns the escaped path component of the $uri (the part + between the host name and the query or fragment). The path can never + be undefined, but it can be the empty string. + + $uri->path_query + $uri->path_query( $new_path_query ) + Sets and returns the escaped path and query components as a single + entity. The path and the query are separated by a "?" character, but + the query can itself contain "?". + + $uri->path_segments + $uri->path_segments( $segment, ... ) + Sets and returns the path. In a scalar context, it returns the same + value as $uri->path. In a list context, it returns the unescaped + path segments that make up the path. Path segments that have + parameters are returned as an anonymous array. The first element is + the unescaped path segment proper; subsequent elements are escaped + parameter strings. Such an anonymous array uses overloading so it + can be treated as a string too, but this string does not include the + parameters. + + Note that absolute paths have the empty string as their first + *path_segment*, i.e. the *path* "/foo/bar" have 3 *path_segments*; + "", "foo" and "bar". + + $uri->query + $uri->query( $new_query ) + Sets and returns the escaped query component of the $uri. + + $uri->query_form + $uri->query_form( $key1 => $val1, $key2 => $val2, ... ) + $uri->query_form( $key1 => $val1, $key2 => $val2, ..., $delim ) + $uri->query_form( \@key_value_pairs ) + $uri->query_form( \@key_value_pairs, $delim ) + $uri->query_form( \%hash ) + $uri->query_form( \%hash, $delim ) + Sets and returns query components that use the + *application/x-www-form-urlencoded* format. Key/value pairs are + separated by "&", and the key is separated from the value by a "=" + character. + + The form can be set either by passing separate key/value pairs, or + via an array or hash reference. Passing an empty array or an empty + hash removes the query component, whereas passing no arguments at + all leaves the component unchanged. The order of keys is undefined + if a hash reference is passed. The old value is always returned as a + list of separate key/value pairs. Assigning this list to a hash is + unwise as the keys returned might repeat. + + The values passed when setting the form can be plain strings or + references to arrays of strings. Passing an array of values has the + same effect as passing the key repeatedly with one value at a time. + All the following statements have the same effect: + + $uri->query_form(foo => 1, foo => 2); + $uri->query_form(foo => [1, 2]); + $uri->query_form([ foo => 1, foo => 2 ]); + $uri->query_form([ foo => [1, 2] ]); + $uri->query_form({ foo => [1, 2] }); + + The $delim parameter can be passed as ";" to force the key/value + pairs to be delimited by ";" instead of "&" in the query string. + This practice is often recommended for URLs embedded in HTML or XML + documents as this avoids the trouble of escaping the "&" character. + You might also set the $URI::DEFAULT_QUERY_FORM_DELIMITER variable + to ";" for the same global effect. + + The "URI::QueryParam" module can be loaded to add further methods to + manipulate the form of a URI. See URI::QueryParam for details. + + $uri->query_keywords + $uri->query_keywords( $keywords, ... ) + $uri->query_keywords( \@keywords ) + Sets and returns query components that use the keywords separated by + "+" format. + + The keywords can be set either by passing separate keywords directly + or by passing a reference to an array of keywords. Passing an empty + array removes the query component, whereas passing no arguments at + all leaves the component unchanged. The old value is always returned + as a list of separate words. + +SERVER METHODS + For schemes where the *authority* component denotes an Internet host, + the following methods are available in addition to the generic methods. + + $uri->userinfo + $uri->userinfo( $new_userinfo ) + Sets and returns the escaped userinfo part of the authority + component. + + For some schemes this is a user name and a password separated by a + colon. This practice is not recommended. Embedding passwords in + clear text (such as URI) has proven to be a security risk in almost + every case where it has been used. + + $uri->host + $uri->host( $new_host ) + Sets and returns the unescaped hostname. + + If the $new_host string ends with a colon and a number, then this + number also sets the port. + + For IPv6 addresses the brackets around the raw address is removed in + the return value from $uri->host. When setting the host attribute to + an IPv6 address you can use a raw address or one enclosed in + brackets. The address needs to be enclosed in brackets if you want + to pass in a new port value as well. + + $uri->ihost + Returns the host in Unicode form. Any IDNA A-labels are turned into + U-labels. + + $uri->port + $uri->port( $new_port ) + Sets and returns the port. The port is a simple integer that should + be greater than 0. + + If a port is not specified explicitly in the URI, then the URI + scheme's default port is returned. If you don't want the default + port substituted, then you can use the $uri->_port method instead. + + $uri->host_port + $uri->host_port( $new_host_port ) + Sets and returns the host and port as a single unit. The returned + value includes a port, even if it matches the default port. The host + part and the port part are separated by a colon: ":". + + For IPv6 addresses the bracketing is preserved; thus + URI->new("http://[::1]/")->host_port returns "[::1]:80". Contrast + this with $uri->host which will remove the brackets. + + $uri->default_port + Returns the default port of the URI scheme to which $uri belongs. + For *http* this is the number 80, for *ftp* this is the number 21, + etc. The default port for a scheme can not be changed. + +SCHEME-SPECIFIC SUPPORT + Scheme-specific support is provided for the following URI schemes. For + "URI" objects that do not belong to one of these, you can only use the + common and generic methods. + + data: + The *data* URI scheme is specified in RFC 2397. It allows inclusion + of small data items as "immediate" data, as if it had been included + externally. + + "URI" objects belonging to the data scheme support the common + methods and two new methods to access their scheme-specific + components: $uri->media_type and $uri->data. See URI::data for + details. + + file: + An old specification of the *file* URI scheme is found in RFC 1738. + A new RFC 2396 based specification in not available yet, but file + URI references are in common use. + + "URI" objects belonging to the file scheme support the common and + generic methods. In addition, they provide two methods for mapping + file URIs back to local file names; $uri->file and $uri->dir. See + URI::file for details. + + ftp: + An old specification of the *ftp* URI scheme is found in RFC 1738. A + new RFC 2396 based specification in not available yet, but ftp URI + references are in common use. + + "URI" objects belonging to the ftp scheme support the common, + generic and server methods. In addition, they provide two methods + for accessing the userinfo sub-components: $uri->user and + $uri->password. + + gopher: + The *gopher* URI scheme is specified in + <draft-murali-url-gopher-1996-12-04> and will hopefully be available + as a RFC 2396 based specification. + + "URI" objects belonging to the gopher scheme support the common, + generic and server methods. In addition, they support some methods + for accessing gopher-specific path components: $uri->gopher_type, + $uri->selector, $uri->search, $uri->string. + + http: + The *http* URI scheme is specified in RFC 2616. The scheme is used + to reference resources hosted by HTTP servers. + + "URI" objects belonging to the http scheme support the common, + generic and server methods. + + https: + The *https* URI scheme is a Netscape invention which is commonly + implemented. The scheme is used to reference HTTP servers through + SSL connections. Its syntax is the same as http, but the default + port is different. + + ldap: + The *ldap* URI scheme is specified in RFC 2255. LDAP is the + Lightweight Directory Access Protocol. An ldap URI describes an LDAP + search operation to perform to retrieve information from an LDAP + directory. + + "URI" objects belonging to the ldap scheme support the common, + generic and server methods as well as ldap-specific methods: + $uri->dn, $uri->attributes, $uri->scope, $uri->filter, + $uri->extensions. See URI::ldap for details. + + ldapi: + Like the *ldap* URI scheme, but uses a UNIX domain socket. The + server methods are not supported, and the local socket path is + available as $uri->un_path. The *ldapi* scheme is used by the + OpenLDAP package. There is no real specification for it, but it is + mentioned in various OpenLDAP manual pages. + + ldaps: + Like the *ldap* URI scheme, but uses an SSL connection. This scheme + is deprecated, as the preferred way is to use the *start_tls* + mechanism. + + mailto: + The *mailto* URI scheme is specified in RFC 2368. The scheme was + originally used to designate the Internet mailing address of an + individual or service. It has (in RFC 2368) been extended to allow + setting of other mail header fields and the message body. + + "URI" objects belonging to the mailto scheme support the common + methods and the generic query methods. In addition, they support the + following mailto-specific methods: $uri->to, $uri->headers. + + Note that the "foo@example.com" part of a mailto is *not* the + "userinfo" and "host" but instead the "path". This allows a mailto + URI to contain multiple comma separated email addresses. + + mms: + The *mms* URL specification can be found at <http://sdp.ppona.com/>. + "URI" objects belonging to the mms scheme support the common, + generic, and server methods, with the exception of userinfo and + query-related sub-components. + + news: + The *news*, *nntp* and *snews* URI schemes are specified in + <draft-gilman-news-url-01> and will hopefully be available as an RFC + 2396 based specification soon. + + "URI" objects belonging to the news scheme support the common, + generic and server methods. In addition, they provide some methods + to access the path: $uri->group and $uri->message. + + nntp: + See *news* scheme. + + pop: + The *pop* URI scheme is specified in RFC 2384. The scheme is used to + reference a POP3 mailbox. + + "URI" objects belonging to the pop scheme support the common, + generic and server methods. In addition, they provide two methods to + access the userinfo components: $uri->user and $uri->auth + + rlogin: + An old specification of the *rlogin* URI scheme is found in RFC + 1738. "URI" objects belonging to the rlogin scheme support the + common, generic and server methods. + + rtsp: + The *rtsp* URL specification can be found in section 3.2 of RFC + 2326. "URI" objects belonging to the rtsp scheme support the common, + generic, and server methods, with the exception of userinfo and + query-related sub-components. + + rtspu: + The *rtspu* URI scheme is used to talk to RTSP servers over UDP + instead of TCP. The syntax is the same as rtsp. + + rsync: + Information about rsync is available from <http://rsync.samba.org/>. + "URI" objects belonging to the rsync scheme support the common, + generic and server methods. In addition, they provide methods to + access the userinfo sub-components: $uri->user and $uri->password. + + sip: + The *sip* URI specification is described in sections 19.1 and 25 of + RFC 3261. "URI" objects belonging to the sip scheme support the + common, generic, and server methods with the exception of path + related sub-components. In addition, they provide two methods to get + and set *sip* parameters: $uri->params_form and $uri->params. + + sips: + See *sip* scheme. Its syntax is the same as sip, but the default + port is different. + + snews: + See *news* scheme. Its syntax is the same as news, but the default + port is different. + + telnet: + An old specification of the *telnet* URI scheme is found in RFC + 1738. "URI" objects belonging to the telnet scheme support the + common, generic and server methods. + + tn3270: + These URIs are used like *telnet* URIs but for connections to IBM + mainframes. "URI" objects belonging to the tn3270 scheme support the + common, generic and server methods. + + ssh: + Information about ssh is available at <http://www.openssh.com/>. + "URI" objects belonging to the ssh scheme support the common, + generic and server methods. In addition, they provide methods to + access the userinfo sub-components: $uri->user and $uri->password. + + sftp: + "URI" objects belonging to the sftp scheme support the common, + generic and server methods. In addition, they provide methods to + access the userinfo sub-components: $uri->user and $uri->password. + + urn: + The syntax of Uniform Resource Names is specified in RFC 2141. "URI" + objects belonging to the urn scheme provide the common methods, and + also the methods $uri->nid and $uri->nss, which return the Namespace + Identifier and the Namespace-Specific String respectively. + + The Namespace Identifier basically works like the Scheme identifier + of URIs, and further divides the URN namespace. Namespace Identifier + assignments are maintained at + <http://www.iana.org/assignments/urn-namespaces>. + + Letter case is not significant for the Namespace Identifier. It is + always returned in lower case by the $uri->nid method. The + $uri->_nid method can be used if you want it in its original case. + + urn:isbn: + The "urn:isbn:" namespace contains International Standard Book + Numbers (ISBNs) and is described in RFC 3187. A "URI" object + belonging to this namespace has the following extra methods (if the + Business::ISBN module is available): $uri->isbn, + $uri->isbn_publisher_code, $uri->isbn_group_code (formerly + isbn_country_code, which is still supported by issues a deprecation + warning), $uri->isbn_as_ean. + + urn:oid: + The "urn:oid:" namespace contains Object Identifiers (OIDs) and is + described in RFC 3061. An object identifier consists of sequences of + digits separated by dots. A "URI" object belonging to this namespace + has an additional method called $uri->oid that can be used to + get/set the oid value. In a list context, oid numbers are returned + as separate elements. + +CONFIGURATION VARIABLES + The following configuration variables influence how the class and its + methods behave: + + $URI::ABS_ALLOW_RELATIVE_SCHEME + Some older parsers used to allow the scheme name to be present in + the relative URL if it was the same as the base URL scheme. RFC 2396 + says that this should be avoided, but you can enable this old + behaviour by setting the $URI::ABS_ALLOW_RELATIVE_SCHEME variable to + a TRUE value. The difference is demonstrated by the following + examples: + + URI->new("http:foo")->abs("http://host/a/b") + ==> "http:foo" + + local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1; + URI->new("http:foo")->abs("http://host/a/b") + ==> "http:/host/a/foo" + + $URI::ABS_REMOTE_LEADING_DOTS + You can also have the abs() method ignore excess ".." segments in + the relative URI by setting $URI::ABS_REMOTE_LEADING_DOTS to a TRUE + value. The difference is demonstrated by the following examples: + + URI->new("../../../foo")->abs("http://host/a/b") + ==> "http://host/../../foo" + + local $URI::ABS_REMOTE_LEADING_DOTS = 1; + URI->new("../../../foo")->abs("http://host/a/b") + ==> "http://host/foo" + + $URI::DEFAULT_QUERY_FORM_DELIMITER + This value can be set to ";" to have the query form "key=value" + pairs delimited by ";" instead of "&" which is the default. + +BUGS + There are some things that are not quite right: + + * Using regexp variables like $1 directly as arguments to the URI + accessor methods does not work too well with current perl + implementations. I would argue that this is actually a bug in perl. + The workaround is to quote them. Example: + + /(...)/ || die; + $u->query("$1"); + + * The escaping (percent encoding) of chars in the 128 .. 255 range + passed to the URI constructor or when setting URI parts using the + accessor methods depend on the state of the internal UTF8 flag (see + utf8::is_utf8) of the string passed. If the UTF8 flag is set the + UTF-8 encoded version of the character is percent encoded. If the + UTF8 flag isn't set the Latin-1 version (byte) of the character is + percent encoded. This basically exposes the internal encoding of + Perl strings. + +PARSING URIs WITH REGEXP + As an alternative to this module, the following (official) regular + expression can be used to decode a URI: + + my($scheme, $authority, $path, $query, $fragment) = + $uri =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; + + The "URI::Split" module provides the function uri_split() as a readable + alternative. + +SEE ALSO + URI::file, URI::WithBase, URI::QueryParam, URI::Escape, URI::Split, + URI::Heuristic + + RFC 2396: "Uniform Resource Identifiers (URI): Generic Syntax", + Berners-Lee, Fielding, Masinter, August 1998. + + <http://www.iana.org/assignments/uri-schemes> + + <http://www.iana.org/assignments/urn-namespaces> + + <http://www.w3.org/Addressing/> + +COPYRIGHT + Copyright 1995-2009 Gisle Aas. + + Copyright 1995 Martijn Koster. + + This program is free software; you can redistribute it and/or modify it + under the same terms as Perl itself. + +AUTHORS / ACKNOWLEDGMENTS + This module is based on the "URI::URL" module, which in turn was + (distantly) based on the "wwwurl.pl" code in the libwww-perl for perl4 + developed by Roy Fielding, as part of the Arcadia project at the + University of California, Irvine, with contributions from Brooks Cutter. + + "URI::URL" was developed by Gisle Aas, Tim Bunce, Roy Fielding and + Martijn Koster with input from other people on the libwww-perl mailing + list. + + "URI" and related subclasses was developed by Gisle Aas. + diff --git a/lib/URI.pm b/lib/URI.pm new file mode 100644 index 0000000..98cd575 --- /dev/null +++ b/lib/URI.pm @@ -0,0 +1,1155 @@ +package URI; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +our ($ABS_REMOTE_LEADING_DOTS, $ABS_ALLOW_RELATIVE_SCHEME, $DEFAULT_QUERY_FORM_DELIMITER); + +my %implements; # mapping from scheme to implementor class + +# Some "official" character classes + +our $reserved = q(;/?:@&=+$,[]); +our $mark = q(-_.!~*'()); #'; emacs +our $unreserved = "A-Za-z0-9\Q$mark\E"; +our $uric = quotemeta($reserved) . $unreserved . "%"; + +our $scheme_re = '[a-zA-Z][a-zA-Z0-9.+\-]*'; + +use Carp (); +use URI::Escape (); + +use overload ('""' => sub { ${$_[0]} }, + '==' => sub { _obj_eq(@_) }, + '!=' => sub { !_obj_eq(@_) }, + fallback => 1, + ); + +# Check if two objects are the same object +sub _obj_eq { + return overload::StrVal($_[0]) eq overload::StrVal($_[1]); +} + +sub new +{ + my($class, $uri, $scheme) = @_; + + $uri = defined ($uri) ? "$uri" : ""; # stringify + # Get rid of potential wrapping + $uri =~ s/^<(?:URL:)?(.*)>$/$1/; # + $uri =~ s/^"(.*)"$/$1/; + $uri =~ s/^\s+//; + $uri =~ s/\s+$//; + + my $impclass; + if ($uri =~ m/^($scheme_re):/so) { + $scheme = $1; + } + else { + if (($impclass = ref($scheme))) { + $scheme = $scheme->scheme; + } + elsif ($scheme && $scheme =~ m/^($scheme_re)(?::|$)/o) { + $scheme = $1; + } + } + $impclass ||= implementor($scheme) || + do { + require URI::_foreign; + $impclass = 'URI::_foreign'; + }; + + return $impclass->_init($uri, $scheme); +} + + +sub new_abs +{ + my($class, $uri, $base) = @_; + $uri = $class->new($uri, $base); + $uri->abs($base); +} + + +sub _init +{ + my $class = shift; + my($str, $scheme) = @_; + # find all funny characters and encode the bytes. + $str = $class->_uric_escape($str); + $str = "$scheme:$str" unless $str =~ /^$scheme_re:/o || + $class->_no_scheme_ok; + my $self = bless \$str, $class; + $self; +} + + +sub _uric_escape +{ + my($class, $str) = @_; + $str =~ s*([^$uric\#])* URI::Escape::escape_char($1) *ego; + utf8::downgrade($str); + return $str; +} + + +sub implementor +{ + my($scheme, $impclass) = @_; + if (!$scheme || $scheme !~ /\A$scheme_re\z/o) { + require URI::_generic; + return "URI::_generic"; + } + + $scheme = lc($scheme); + + if ($impclass) { + # Set the implementor class for a given scheme + my $old = $implements{$scheme}; + $impclass->_init_implementor($scheme); + $implements{$scheme} = $impclass; + return $old; + } + + my $ic = $implements{$scheme}; + return $ic if $ic; + + # scheme not yet known, look for internal or + # preloaded (with 'use') implementation + $ic = "URI::$scheme"; # default location + + # turn scheme into a valid perl identifier by a simple transformation... + $ic =~ s/\+/_P/g; + $ic =~ s/\./_O/g; + $ic =~ s/\-/_/g; + + no strict 'refs'; + # check we actually have one for the scheme: + unless (@{"${ic}::ISA"}) { + # Try to load it + eval "require $ic"; + die $@ if $@ && $@ !~ /Can\'t locate.*in \@INC/; + return undef unless @{"${ic}::ISA"}; + } + + $ic->_init_implementor($scheme); + $implements{$scheme} = $ic; + $ic; +} + + +sub _init_implementor +{ + my($class, $scheme) = @_; + # Remember that one implementor class may actually + # serve to implement several URI schemes. +} + + +sub clone +{ + my $self = shift; + my $other = $$self; + bless \$other, ref $self; +} + +sub TO_JSON { ${$_[0]} } + +sub _no_scheme_ok { 0 } + +sub _scheme +{ + my $self = shift; + + unless (@_) { + return undef unless $$self =~ /^($scheme_re):/o; + return $1; + } + + my $old; + my $new = shift; + if (defined($new) && length($new)) { + Carp::croak("Bad scheme '$new'") unless $new =~ /^$scheme_re$/o; + $old = $1 if $$self =~ s/^($scheme_re)://o; + my $newself = URI->new("$new:$$self"); + $$self = $$newself; + bless $self, ref($newself); + } + else { + if ($self->_no_scheme_ok) { + $old = $1 if $$self =~ s/^($scheme_re)://o; + Carp::carp("Oops, opaque part now look like scheme") + if $^W && $$self =~ m/^$scheme_re:/o + } + else { + $old = $1 if $$self =~ m/^($scheme_re):/o; + } + } + + return $old; +} + +sub scheme +{ + my $scheme = shift->_scheme(@_); + return undef unless defined $scheme; + lc($scheme); +} + +sub has_recognized_scheme { + my $self = shift; + return ref($self) !~ /^URI::_(?:foreign|generic)\z/; +} + +sub opaque +{ + my $self = shift; + + unless (@_) { + $$self =~ /^(?:$scheme_re:)?([^\#]*)/o or die; + return $1; + } + + $$self =~ /^($scheme_re:)? # optional scheme + ([^\#]*) # opaque + (\#.*)? # optional fragment + $/sx or die; + + my $old_scheme = $1; + my $old_opaque = $2; + my $old_frag = $3; + + my $new_opaque = shift; + $new_opaque = "" unless defined $new_opaque; + $new_opaque =~ s/([^$uric])/ URI::Escape::escape_char($1)/ego; + utf8::downgrade($new_opaque); + + $$self = defined($old_scheme) ? $old_scheme : ""; + $$self .= $new_opaque; + $$self .= $old_frag if defined $old_frag; + + $old_opaque; +} + +sub path { goto &opaque } # alias + + +sub fragment +{ + my $self = shift; + unless (@_) { + return undef unless $$self =~ /\#(.*)/s; + return $1; + } + + my $old; + $old = $1 if $$self =~ s/\#(.*)//s; + + my $new_frag = shift; + if (defined $new_frag) { + $new_frag =~ s/([^$uric])/ URI::Escape::escape_char($1) /ego; + utf8::downgrade($new_frag); + $$self .= "#$new_frag"; + } + $old; +} + + +sub as_string +{ + my $self = shift; + $$self; +} + + +sub as_iri +{ + my $self = shift; + my $str = $$self; + if ($str =~ s/%([89a-fA-F][0-9a-fA-F])/chr(hex($1))/eg) { + # All this crap because the more obvious: + # + # Encode::decode("UTF-8", $str, sub { sprintf "%%%02X", shift }) + # + # doesn't work before Encode 2.39. Wait for a standard release + # to bundle that version. + + require Encode; + my $enc = Encode::find_encoding("UTF-8"); + my $u = ""; + while (length $str) { + $u .= $enc->decode($str, Encode::FB_QUIET()); + if (length $str) { + # escape next char + $u .= URI::Escape::escape_char(substr($str, 0, 1, "")); + } + } + $str = $u; + } + return $str; +} + + +sub canonical +{ + # Make sure scheme is lowercased, that we don't escape unreserved chars, + # and that we use upcase escape sequences. + + my $self = shift; + my $scheme = $self->_scheme || ""; + my $uc_scheme = $scheme =~ /[A-Z]/; + my $esc = $$self =~ /%[a-fA-F0-9]{2}/; + return $self unless $uc_scheme || $esc; + + my $other = $self->clone; + if ($uc_scheme) { + $other->_scheme(lc $scheme); + } + if ($esc) { + $$other =~ s{%([0-9a-fA-F]{2})} + { my $a = chr(hex($1)); + $a =~ /^[$unreserved]\z/o ? $a : "%\U$1" + }ge; + } + return $other; +} + +# Compare two URIs, subclasses will provide a more correct implementation +sub eq { + my($self, $other) = @_; + $self = URI->new($self, $other) unless ref $self; + $other = URI->new($other, $self) unless ref $other; + ref($self) eq ref($other) && # same class + $self->canonical->as_string eq $other->canonical->as_string; +} + +# generic-URI transformation methods +sub abs { $_[0]; } +sub rel { $_[0]; } + +sub secure { 0 } + +# help out Storable +sub STORABLE_freeze { + my($self, $cloning) = @_; + return $$self; +} + +sub STORABLE_thaw { + my($self, $cloning, $str) = @_; + $$self = $str; +} + +1; + +__END__ + +=head1 NAME + +URI - Uniform Resource Identifiers (absolute and relative) + +=head1 SYNOPSIS + + $u1 = URI->new("http://www.perl.com"); + $u2 = URI->new("foo", "http"); + $u3 = $u2->abs($u1); + $u4 = $u3->clone; + $u5 = URI->new("HTTP://WWW.perl.com:80")->canonical; + + $str = $u->as_string; + $str = "$u"; + + $scheme = $u->scheme; + $opaque = $u->opaque; + $path = $u->path; + $frag = $u->fragment; + + $u->scheme("ftp"); + $u->host("ftp.perl.com"); + $u->path("cpan/"); + +=head1 DESCRIPTION + +This module implements the C<URI> class. Objects of this class +represent "Uniform Resource Identifier references" as specified in RFC +2396 (and updated by RFC 2732). + +A Uniform Resource Identifier is a compact string of characters that +identifies an abstract or physical resource. A Uniform Resource +Identifier can be further classified as either a Uniform Resource Locator +(URL) or a Uniform Resource Name (URN). The distinction between URL +and URN does not matter to the C<URI> class interface. A +"URI-reference" is a URI that may have additional information attached +in the form of a fragment identifier. + +An absolute URI reference consists of three parts: a I<scheme>, a +I<scheme-specific part> and a I<fragment> identifier. A subset of URI +references share a common syntax for hierarchical namespaces. For +these, the scheme-specific part is further broken down into +I<authority>, I<path> and I<query> components. These URIs can also +take the form of relative URI references, where the scheme (and +usually also the authority) component is missing, but implied by the +context of the URI reference. The three forms of URI reference +syntax are summarized as follows: + + <scheme>:<scheme-specific-part>#<fragment> + <scheme>://<authority><path>?<query>#<fragment> + <path>?<query>#<fragment> + +The components into which a URI reference can be divided depend on the +I<scheme>. The C<URI> class provides methods to get and set the +individual components. The methods available for a specific +C<URI> object depend on the scheme. + +=head1 CONSTRUCTORS + +The following methods construct new C<URI> objects: + +=over 4 + +=item $uri = URI->new( $str ) + +=item $uri = URI->new( $str, $scheme ) + +Constructs a new URI object. The string +representation of a URI is given as argument, together with an optional +scheme specification. Common URI wrappers like "" and <>, as well as +leading and trailing white space, are automatically removed from +the $str argument before it is processed further. + +The constructor determines the scheme, maps this to an appropriate +URI subclass, constructs a new object of that class and returns it. + +If the scheme isn't one of those that URI recognizes, you still get +an URI object back that you can access the generic methods on. The +C<< $uri->has_recognized_scheme >> method can be used to test for +this. + +The $scheme argument is only used when $str is a +relative URI. It can be either a simple string that +denotes the scheme, a string containing an absolute URI reference, or +an absolute C<URI> object. If no $scheme is specified for a relative +URI $str, then $str is simply treated as a generic URI (no scheme-specific +methods available). + +The set of characters available for building URI references is +restricted (see L<URI::Escape>). Characters outside this set are +automatically escaped by the URI constructor. + +=item $uri = URI->new_abs( $str, $base_uri ) + +Constructs a new absolute URI object. The $str argument can +denote a relative or absolute URI. If relative, then it is +absolutized using $base_uri as base. The $base_uri must be an absolute +URI. + +=item $uri = URI::file->new( $filename ) + +=item $uri = URI::file->new( $filename, $os ) + +Constructs a new I<file> URI from a file name. See L<URI::file>. + +=item $uri = URI::file->new_abs( $filename ) + +=item $uri = URI::file->new_abs( $filename, $os ) + +Constructs a new absolute I<file> URI from a file name. See +L<URI::file>. + +=item $uri = URI::file->cwd + +Returns the current working directory as a I<file> URI. See +L<URI::file>. + +=item $uri->clone + +Returns a copy of the $uri. + +=back + +=head1 COMMON METHODS + +The methods described in this section are available for all C<URI> +objects. + +Methods that give access to components of a URI always return the +old value of the component. The value returned is C<undef> if the +component was not present. There is generally a difference between a +component that is empty (represented as C<"">) and a component that is +missing (represented as C<undef>). If an accessor method is given an +argument, it updates the corresponding component in addition to +returning the old value of the component. Passing an undefined +argument removes the component (if possible). The description of +each accessor method indicates whether the component is passed as +an escaped (percent-encoded) or an unescaped string. A component that can be further +divided into sub-parts are usually passed escaped, as unescaping might +change its semantics. + +The common methods available for all URI are: + +=over 4 + +=item $uri->scheme + +=item $uri->scheme( $new_scheme ) + +Sets and returns the scheme part of the $uri. If the $uri is +relative, then $uri->scheme returns C<undef>. If called with an +argument, it updates the scheme of $uri, possibly changing the +class of $uri, and returns the old scheme value. The method croaks +if the new scheme name is illegal; a scheme name must begin with a +letter and must consist of only US-ASCII letters, numbers, and a few +special marks: ".", "+", "-". This restriction effectively means +that the scheme must be passed unescaped. Passing an undefined +argument to the scheme method makes the URI relative (if possible). + +Letter case does not matter for scheme names. The string +returned by $uri->scheme is always lowercase. If you want the scheme +just as it was written in the URI in its original case, +you can use the $uri->_scheme method instead. + +=item $uri->has_recognized_scheme + +Returns TRUE if the URI scheme is one that URI recognizes. + +It will also be TRUE for relative URLs where a recognized +scheme was provided to the constructor, even if C<< $uri->scheme >> +returns C<undef> for these. + +=item $uri->opaque + +=item $uri->opaque( $new_opaque ) + +Sets and returns the scheme-specific part of the $uri +(everything between the scheme and the fragment) +as an escaped string. + +=item $uri->path + +=item $uri->path( $new_path ) + +Sets and returns the same value as $uri->opaque unless the URI +supports the generic syntax for hierarchical namespaces. +In that case the generic method is overridden to set and return +the part of the URI between the I<host name> and the I<fragment>. + +=item $uri->fragment + +=item $uri->fragment( $new_frag ) + +Returns the fragment identifier of a URI reference +as an escaped string. + +=item $uri->as_string + +Returns a URI object to a plain ASCII string. URI objects are +also converted to plain strings automatically by overloading. This +means that $uri objects can be used as plain strings in most Perl +constructs. + +=item $uri->as_iri + +Returns a Unicode string representing the URI. Escaped UTF-8 sequences +representing non-ASCII characters are turned into their corresponding Unicode +code point. + +=item $uri->canonical + +Returns a normalized version of the URI. The rules +for normalization are scheme-dependent. They usually involve +lowercasing the scheme and Internet host name components, +removing the explicit port specification if it matches the default port, +uppercasing all escape sequences, and unescaping octets that can be +better represented as plain characters. + +For efficiency reasons, if the $uri is already in normalized form, +then a reference to it is returned instead of a copy. + +=item $uri->eq( $other_uri ) + +=item URI::eq( $first_uri, $other_uri ) + +Tests whether two URI references are equal. URI references +that normalize to the same string are considered equal. The method +can also be used as a plain function which can also test two string +arguments. + +If you need to test whether two C<URI> object references denote the +same object, use the '==' operator. + +=item $uri->abs( $base_uri ) + +Returns an absolute URI reference. If $uri is already +absolute, then a reference to it is simply returned. If the $uri +is relative, then a new absolute URI is constructed by combining the +$uri and the $base_uri, and returned. + +=item $uri->rel( $base_uri ) + +Returns a relative URI reference if it is possible to +make one that denotes the same resource relative to $base_uri. +If not, then $uri is simply returned. + +=item $uri->secure + +Returns a TRUE value if the URI is considered to point to a resource on +a secure channel, such as an SSL or TLS encrypted one. + +=back + +=head1 GENERIC METHODS + +The following methods are available to schemes that use the +common/generic syntax for hierarchical namespaces. The descriptions of +schemes below indicate which these are. Unrecognized schemes are +assumed to support the generic syntax, and therefore the following +methods: + +=over 4 + +=item $uri->authority + +=item $uri->authority( $new_authority ) + +Sets and returns the escaped authority component +of the $uri. + +=item $uri->path + +=item $uri->path( $new_path ) + +Sets and returns the escaped path component of +the $uri (the part between the host name and the query or fragment). +The path can never be undefined, but it can be the empty string. + +=item $uri->path_query + +=item $uri->path_query( $new_path_query ) + +Sets and returns the escaped path and query +components as a single entity. The path and the query are +separated by a "?" character, but the query can itself contain "?". + +=item $uri->path_segments + +=item $uri->path_segments( $segment, ... ) + +Sets and returns the path. In a scalar context, it returns +the same value as $uri->path. In a list context, it returns the +unescaped path segments that make up the path. Path segments that +have parameters are returned as an anonymous array. The first element +is the unescaped path segment proper; subsequent elements are escaped +parameter strings. Such an anonymous array uses overloading so it can +be treated as a string too, but this string does not include the +parameters. + +Note that absolute paths have the empty string as their first +I<path_segment>, i.e. the I<path> C</foo/bar> have 3 +I<path_segments>; "", "foo" and "bar". + +=item $uri->query + +=item $uri->query( $new_query ) + +Sets and returns the escaped query component of +the $uri. + +=item $uri->query_form + +=item $uri->query_form( $key1 => $val1, $key2 => $val2, ... ) + +=item $uri->query_form( $key1 => $val1, $key2 => $val2, ..., $delim ) + +=item $uri->query_form( \@key_value_pairs ) + +=item $uri->query_form( \@key_value_pairs, $delim ) + +=item $uri->query_form( \%hash ) + +=item $uri->query_form( \%hash, $delim ) + +Sets and returns query components that use the +I<application/x-www-form-urlencoded> format. Key/value pairs are +separated by "&", and the key is separated from the value by a "=" +character. + +The form can be set either by passing separate key/value pairs, or via +an array or hash reference. Passing an empty array or an empty hash +removes the query component, whereas passing no arguments at all leaves +the component unchanged. The order of keys is undefined if a hash +reference is passed. The old value is always returned as a list of +separate key/value pairs. Assigning this list to a hash is unwise as +the keys returned might repeat. + +The values passed when setting the form can be plain strings or +references to arrays of strings. Passing an array of values has the +same effect as passing the key repeatedly with one value at a time. +All the following statements have the same effect: + + $uri->query_form(foo => 1, foo => 2); + $uri->query_form(foo => [1, 2]); + $uri->query_form([ foo => 1, foo => 2 ]); + $uri->query_form([ foo => [1, 2] ]); + $uri->query_form({ foo => [1, 2] }); + +The $delim parameter can be passed as ";" to force the key/value pairs +to be delimited by ";" instead of "&" in the query string. This +practice is often recommended for URLs embedded in HTML or XML +documents as this avoids the trouble of escaping the "&" character. +You might also set the $URI::DEFAULT_QUERY_FORM_DELIMITER variable to +";" for the same global effect. + +The C<URI::QueryParam> module can be loaded to add further methods to +manipulate the form of a URI. See L<URI::QueryParam> for details. + +=item $uri->query_keywords + +=item $uri->query_keywords( $keywords, ... ) + +=item $uri->query_keywords( \@keywords ) + +Sets and returns query components that use the +keywords separated by "+" format. + +The keywords can be set either by passing separate keywords directly +or by passing a reference to an array of keywords. Passing an empty +array removes the query component, whereas passing no arguments at +all leaves the component unchanged. The old value is always returned +as a list of separate words. + +=back + +=head1 SERVER METHODS + +For schemes where the I<authority> component denotes an Internet host, +the following methods are available in addition to the generic +methods. + +=over 4 + +=item $uri->userinfo + +=item $uri->userinfo( $new_userinfo ) + +Sets and returns the escaped userinfo part of the +authority component. + +For some schemes this is a user name and a password separated by +a colon. This practice is not recommended. Embedding passwords in +clear text (such as URI) has proven to be a security risk in almost +every case where it has been used. + +=item $uri->host + +=item $uri->host( $new_host ) + +Sets and returns the unescaped hostname. + +If the $new_host string ends with a colon and a number, then this +number also sets the port. + +For IPv6 addresses the brackets around the raw address is removed in the return +value from $uri->host. When setting the host attribute to an IPv6 address you +can use a raw address or one enclosed in brackets. The address needs to be +enclosed in brackets if you want to pass in a new port value as well. + +=item $uri->ihost + +Returns the host in Unicode form. Any IDNA A-labels are turned into U-labels. + +=item $uri->port + +=item $uri->port( $new_port ) + +Sets and returns the port. The port is a simple integer +that should be greater than 0. + +If a port is not specified explicitly in the URI, then the URI scheme's default port +is returned. If you don't want the default port +substituted, then you can use the $uri->_port method instead. + +=item $uri->host_port + +=item $uri->host_port( $new_host_port ) + +Sets and returns the host and port as a single +unit. The returned value includes a port, even if it matches the +default port. The host part and the port part are separated by a +colon: ":". + +For IPv6 addresses the bracketing is preserved; thus +URI->new("http://[::1]/")->host_port returns "[::1]:80". Contrast this with +$uri->host which will remove the brackets. + +=item $uri->default_port + +Returns the default port of the URI scheme to which $uri +belongs. For I<http> this is the number 80, for I<ftp> this +is the number 21, etc. The default port for a scheme can not be +changed. + +=back + +=head1 SCHEME-SPECIFIC SUPPORT + +Scheme-specific support is provided for the following URI schemes. For C<URI> +objects that do not belong to one of these, you can only use the common and +generic methods. + +=over 4 + +=item B<data>: + +The I<data> URI scheme is specified in RFC 2397. It allows inclusion +of small data items as "immediate" data, as if it had been included +externally. + +C<URI> objects belonging to the data scheme support the common methods +and two new methods to access their scheme-specific components: +$uri->media_type and $uri->data. See L<URI::data> for details. + +=item B<file>: + +An old specification of the I<file> URI scheme is found in RFC 1738. +A new RFC 2396 based specification in not available yet, but file URI +references are in common use. + +C<URI> objects belonging to the file scheme support the common and +generic methods. In addition, they provide two methods for mapping file URIs +back to local file names; $uri->file and $uri->dir. See L<URI::file> +for details. + +=item B<ftp>: + +An old specification of the I<ftp> URI scheme is found in RFC 1738. A +new RFC 2396 based specification in not available yet, but ftp URI +references are in common use. + +C<URI> objects belonging to the ftp scheme support the common, +generic and server methods. In addition, they provide two methods for +accessing the userinfo sub-components: $uri->user and $uri->password. + +=item B<gopher>: + +The I<gopher> URI scheme is specified in +<draft-murali-url-gopher-1996-12-04> and will hopefully be available +as a RFC 2396 based specification. + +C<URI> objects belonging to the gopher scheme support the common, +generic and server methods. In addition, they support some methods for +accessing gopher-specific path components: $uri->gopher_type, +$uri->selector, $uri->search, $uri->string. + +=item B<http>: + +The I<http> URI scheme is specified in RFC 2616. +The scheme is used to reference resources hosted by HTTP servers. + +C<URI> objects belonging to the http scheme support the common, +generic and server methods. + +=item B<https>: + +The I<https> URI scheme is a Netscape invention which is commonly +implemented. The scheme is used to reference HTTP servers through SSL +connections. Its syntax is the same as http, but the default +port is different. + +=item B<ldap>: + +The I<ldap> URI scheme is specified in RFC 2255. LDAP is the +Lightweight Directory Access Protocol. An ldap URI describes an LDAP +search operation to perform to retrieve information from an LDAP +directory. + +C<URI> objects belonging to the ldap scheme support the common, +generic and server methods as well as ldap-specific methods: $uri->dn, +$uri->attributes, $uri->scope, $uri->filter, $uri->extensions. See +L<URI::ldap> for details. + +=item B<ldapi>: + +Like the I<ldap> URI scheme, but uses a UNIX domain socket. The +server methods are not supported, and the local socket path is +available as $uri->un_path. The I<ldapi> scheme is used by the +OpenLDAP package. There is no real specification for it, but it is +mentioned in various OpenLDAP manual pages. + +=item B<ldaps>: + +Like the I<ldap> URI scheme, but uses an SSL connection. This +scheme is deprecated, as the preferred way is to use the I<start_tls> +mechanism. + +=item B<mailto>: + +The I<mailto> URI scheme is specified in RFC 2368. The scheme was +originally used to designate the Internet mailing address of an +individual or service. It has (in RFC 2368) been extended to allow +setting of other mail header fields and the message body. + +C<URI> objects belonging to the mailto scheme support the common +methods and the generic query methods. In addition, they support the +following mailto-specific methods: $uri->to, $uri->headers. + +Note that the "foo@example.com" part of a mailto is I<not> the +C<userinfo> and C<host> but instead the C<path>. This allows a +mailto URI to contain multiple comma separated email addresses. + +=item B<mms>: + +The I<mms> URL specification can be found at L<http://sdp.ppona.com/>. +C<URI> objects belonging to the mms scheme support the common, +generic, and server methods, with the exception of userinfo and +query-related sub-components. + +=item B<news>: + +The I<news>, I<nntp> and I<snews> URI schemes are specified in +<draft-gilman-news-url-01> and will hopefully be available as an RFC +2396 based specification soon. + +C<URI> objects belonging to the news scheme support the common, +generic and server methods. In addition, they provide some methods to +access the path: $uri->group and $uri->message. + +=item B<nntp>: + +See I<news> scheme. + +=item B<pop>: + +The I<pop> URI scheme is specified in RFC 2384. The scheme is used to +reference a POP3 mailbox. + +C<URI> objects belonging to the pop scheme support the common, generic +and server methods. In addition, they provide two methods to access the +userinfo components: $uri->user and $uri->auth + +=item B<rlogin>: + +An old specification of the I<rlogin> URI scheme is found in RFC +1738. C<URI> objects belonging to the rlogin scheme support the +common, generic and server methods. + +=item B<rtsp>: + +The I<rtsp> URL specification can be found in section 3.2 of RFC 2326. +C<URI> objects belonging to the rtsp scheme support the common, +generic, and server methods, with the exception of userinfo and +query-related sub-components. + +=item B<rtspu>: + +The I<rtspu> URI scheme is used to talk to RTSP servers over UDP +instead of TCP. The syntax is the same as rtsp. + +=item B<rsync>: + +Information about rsync is available from L<http://rsync.samba.org/>. +C<URI> objects belonging to the rsync scheme support the common, +generic and server methods. In addition, they provide methods to +access the userinfo sub-components: $uri->user and $uri->password. + +=item B<sip>: + +The I<sip> URI specification is described in sections 19.1 and 25 +of RFC 3261. C<URI> objects belonging to the sip scheme support the +common, generic, and server methods with the exception of path related +sub-components. In addition, they provide two methods to get and set +I<sip> parameters: $uri->params_form and $uri->params. + +=item B<sips>: + +See I<sip> scheme. Its syntax is the same as sip, but the default +port is different. + +=item B<snews>: + +See I<news> scheme. Its syntax is the same as news, but the default +port is different. + +=item B<telnet>: + +An old specification of the I<telnet> URI scheme is found in RFC +1738. C<URI> objects belonging to the telnet scheme support the +common, generic and server methods. + +=item B<tn3270>: + +These URIs are used like I<telnet> URIs but for connections to IBM +mainframes. C<URI> objects belonging to the tn3270 scheme support the +common, generic and server methods. + +=item B<ssh>: + +Information about ssh is available at L<http://www.openssh.com/>. +C<URI> objects belonging to the ssh scheme support the common, +generic and server methods. In addition, they provide methods to +access the userinfo sub-components: $uri->user and $uri->password. + +=item B<sftp>: + +C<URI> objects belonging to the sftp scheme support the common, +generic and server methods. In addition, they provide methods to +access the userinfo sub-components: $uri->user and $uri->password. + +=item B<urn>: + +The syntax of Uniform Resource Names is specified in RFC 2141. C<URI> +objects belonging to the urn scheme provide the common methods, and also the +methods $uri->nid and $uri->nss, which return the Namespace Identifier +and the Namespace-Specific String respectively. + +The Namespace Identifier basically works like the Scheme identifier of +URIs, and further divides the URN namespace. Namespace Identifier +assignments are maintained at +L<http://www.iana.org/assignments/urn-namespaces>. + +Letter case is not significant for the Namespace Identifier. It is +always returned in lower case by the $uri->nid method. The $uri->_nid +method can be used if you want it in its original case. + +=item B<urn>:B<isbn>: + +The C<urn:isbn:> namespace contains International Standard Book +Numbers (ISBNs) and is described in RFC 3187. A C<URI> object belonging +to this namespace has the following extra methods (if the +Business::ISBN module is available): $uri->isbn, +$uri->isbn_publisher_code, $uri->isbn_group_code (formerly isbn_country_code, +which is still supported by issues a deprecation warning), $uri->isbn_as_ean. + +=item B<urn>:B<oid>: + +The C<urn:oid:> namespace contains Object Identifiers (OIDs) and is +described in RFC 3061. An object identifier consists of sequences of digits +separated by dots. A C<URI> object belonging to this namespace has an +additional method called $uri->oid that can be used to get/set the oid +value. In a list context, oid numbers are returned as separate elements. + +=back + +=head1 CONFIGURATION VARIABLES + +The following configuration variables influence how the class and its +methods behave: + +=over 4 + +=item $URI::ABS_ALLOW_RELATIVE_SCHEME + +Some older parsers used to allow the scheme name to be present in the +relative URL if it was the same as the base URL scheme. RFC 2396 says +that this should be avoided, but you can enable this old behaviour by +setting the $URI::ABS_ALLOW_RELATIVE_SCHEME variable to a TRUE value. +The difference is demonstrated by the following examples: + + URI->new("http:foo")->abs("http://host/a/b") + ==> "http:foo" + + local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1; + URI->new("http:foo")->abs("http://host/a/b") + ==> "http:/host/a/foo" + + +=item $URI::ABS_REMOTE_LEADING_DOTS + +You can also have the abs() method ignore excess ".." +segments in the relative URI by setting $URI::ABS_REMOTE_LEADING_DOTS +to a TRUE value. The difference is demonstrated by the following +examples: + + URI->new("../../../foo")->abs("http://host/a/b") + ==> "http://host/../../foo" + + local $URI::ABS_REMOTE_LEADING_DOTS = 1; + URI->new("../../../foo")->abs("http://host/a/b") + ==> "http://host/foo" + +=item $URI::DEFAULT_QUERY_FORM_DELIMITER + +This value can be set to ";" to have the query form C<key=value> pairs +delimited by ";" instead of "&" which is the default. + +=back + +=head1 BUGS + +There are some things that are not quite right: + +=over + +=item * + +Using regexp variables like $1 directly as arguments to the URI accessor methods +does not work too well with current perl implementations. I would argue +that this is actually a bug in perl. The workaround is to quote +them. Example: + + /(...)/ || die; + $u->query("$1"); + + +=item * + +The escaping (percent encoding) of chars in the 128 .. 255 range passed to the +URI constructor or when setting URI parts using the accessor methods depend on +the state of the internal UTF8 flag (see utf8::is_utf8) of the string passed. +If the UTF8 flag is set the UTF-8 encoded version of the character is percent +encoded. If the UTF8 flag isn't set the Latin-1 version (byte) of the +character is percent encoded. This basically exposes the internal encoding of +Perl strings. + +=back + +=head1 PARSING URIs WITH REGEXP + +As an alternative to this module, the following (official) regular +expression can be used to decode a URI: + + my($scheme, $authority, $path, $query, $fragment) = + $uri =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; + +The C<URI::Split> module provides the function uri_split() as a +readable alternative. + +=head1 SEE ALSO + +L<URI::file>, L<URI::WithBase>, L<URI::QueryParam>, L<URI::Escape>, +L<URI::Split>, L<URI::Heuristic> + +RFC 2396: "Uniform Resource Identifiers (URI): Generic Syntax", +Berners-Lee, Fielding, Masinter, August 1998. + +L<http://www.iana.org/assignments/uri-schemes> + +L<http://www.iana.org/assignments/urn-namespaces> + +L<http://www.w3.org/Addressing/> + +=head1 COPYRIGHT + +Copyright 1995-2009 Gisle Aas. + +Copyright 1995 Martijn Koster. + +This program is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=head1 AUTHORS / ACKNOWLEDGMENTS + +This module is based on the C<URI::URL> module, which in turn was +(distantly) based on the C<wwwurl.pl> code in the libwww-perl for +perl4 developed by Roy Fielding, as part of the Arcadia project at the +University of California, Irvine, with contributions from Brooks +Cutter. + +C<URI::URL> was developed by Gisle Aas, Tim Bunce, Roy Fielding and +Martijn Koster with input from other people on the libwww-perl mailing +list. + +C<URI> and related subclasses was developed by Gisle Aas. + +=cut diff --git a/lib/URI/Escape.pm b/lib/URI/Escape.pm new file mode 100644 index 0000000..30f2b31 --- /dev/null +++ b/lib/URI/Escape.pm @@ -0,0 +1,220 @@ +package URI::Escape; + +use strict; +use warnings; + +=head1 NAME + +URI::Escape - Percent-encode and percent-decode unsafe characters + +=head1 SYNOPSIS + + use URI::Escape; + $safe = uri_escape("10% is enough\n"); + $verysafe = uri_escape("foo", "\0-\377"); + $str = uri_unescape($safe); + +=head1 DESCRIPTION + +This module provides functions to percent-encode and percent-decode URI strings as +defined by RFC 3986. Percent-encoding URI's is informally called "URI escaping". +This is the terminology used by this module, which predates the formalization of the +terms by the RFC by several years. + +A URI consists of a restricted set of characters. The restricted set +of characters consists of digits, letters, and a few graphic symbols +chosen from those common to most of the character encodings and input +facilities available to Internet users. They are made up of the +"unreserved" and "reserved" character sets as defined in RFC 3986. + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + reserved = ":" / "/" / "?" / "#" / "[" / "]" / "@" + "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + +In addition, any byte (octet) can be represented in a URI by an escape +sequence: a triplet consisting of the character "%" followed by two +hexadecimal digits. A byte can also be represented directly by a +character, using the US-ASCII character for that octet. + +Some of the characters are I<reserved> for use as delimiters or as +part of certain URI components. These must be escaped if they are to +be treated as ordinary data. Read RFC 3986 for further details. + +The functions provided (and exported by default) from this module are: + +=over 4 + +=item uri_escape( $string ) + +=item uri_escape( $string, $unsafe ) + +Replaces each unsafe character in the $string with the corresponding +escape sequence and returns the result. The $string argument should +be a string of bytes. The uri_escape() function will croak if given a +characters with code above 255. Use uri_escape_utf8() if you know you +have such chars or/and want chars in the 128 .. 255 range treated as +UTF-8. + +The uri_escape() function takes an optional second argument that +overrides the set of characters that are to be escaped. The set is +specified as a string that can be used in a regular expression +character class (between [ ]). E.g.: + + "\x00-\x1f\x7f-\xff" # all control and hi-bit characters + "a-z" # all lower case characters + "^A-Za-z" # everything not a letter + +The default set of characters to be escaped is all those which are +I<not> part of the C<unreserved> character class shown above as well +as the reserved characters. I.e. the default is: + + "^A-Za-z0-9\-\._~" + +=item uri_escape_utf8( $string ) + +=item uri_escape_utf8( $string, $unsafe ) + +Works like uri_escape(), but will encode chars as UTF-8 before +escaping them. This makes this function able to deal with characters +with code above 255 in $string. Note that chars in the 128 .. 255 +range will be escaped differently by this function compared to what +uri_escape() would. For chars in the 0 .. 127 range there is no +difference. + +Equivalent to: + + utf8::encode($string); + my $uri = uri_escape($string); + +Note: JavaScript has a function called escape() that produces the +sequence "%uXXXX" for chars in the 256 .. 65535 range. This function +has really nothing to do with URI escaping but some folks got confused +since it "does the right thing" in the 0 .. 255 range. Because of +this you sometimes see "URIs" with these kind of escapes. The +JavaScript encodeURIComponent() function is similar to uri_escape_utf8(). + +=item uri_unescape($string,...) + +Returns a string with each %XX sequence replaced with the actual byte +(octet). + +This does the same as: + + $string =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + +but does not modify the string in-place as this RE would. Using the +uri_unescape() function instead of the RE might make the code look +cleaner and is a few characters less to type. + +In a simple benchmark test I did, +calling the function (instead of the inline RE above) if a few chars +were unescaped was something like 40% slower, and something like 700% slower if none were. If +you are going to unescape a lot of times it might be a good idea to +inline the RE. + +If the uri_unescape() function is passed multiple strings, then each +one is returned unescaped. + +=back + +The module can also export the C<%escapes> hash, which contains the +mapping from all 256 bytes to the corresponding escape codes. Lookup +in this hash is faster than evaluating C<sprintf("%%%02X", ord($byte))> +each time. + +=head1 SEE ALSO + +L<URI> + + +=head1 COPYRIGHT + +Copyright 1995-2004 Gisle Aas. + +This program is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + +use Exporter 5.57 'import'; +our %escapes; +our @EXPORT = qw(uri_escape uri_unescape uri_escape_utf8); +our @EXPORT_OK = qw(%escapes); +our $VERSION = "3.31"; + +use Carp (); + +# Build a char->hex map +for (0..255) { + $escapes{chr($_)} = sprintf("%%%02X", $_); +} + +my %subst; # compiled patterns + +my %Unsafe = ( + RFC2732 => qr/[^A-Za-z0-9\-_.!~*'()]/, + RFC3986 => qr/[^A-Za-z0-9\-\._~]/, +); + +sub uri_escape { + my($text, $patn) = @_; + return undef unless defined $text; + if (defined $patn){ + unless (exists $subst{$patn}) { + # Because we can't compile the regex we fake it with a cached sub + (my $tmp = $patn) =~ s,/,\\/,g; + eval "\$subst{\$patn} = sub {\$_[0] =~ s/([$tmp])/\$escapes{\$1} || _fail_hi(\$1)/ge; }"; + Carp::croak("uri_escape: $@") if $@; + } + &{$subst{$patn}}($text); + } else { + $text =~ s/($Unsafe{RFC3986})/$escapes{$1} || _fail_hi($1)/ge; + } + $text; +} + +sub _fail_hi { + my $chr = shift; + Carp::croak(sprintf "Can't escape \\x{%04X}, try uri_escape_utf8() instead", ord($chr)); +} + +sub uri_escape_utf8 { + my $text = shift; + utf8::encode($text); + return uri_escape($text, @_); +} + +sub uri_unescape { + # Note from RFC1630: "Sequences which start with a percent sign + # but are not followed by two hexadecimal characters are reserved + # for future extension" + my $str = shift; + if (@_ && wantarray) { + # not executed for the common case of a single argument + my @str = ($str, @_); # need to copy + for (@str) { + s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + } + return @str; + } + $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; + $str; +} + +# XXX FIXME escape_char is buggy as it assigns meaning to the string's storage format. +sub escape_char { + # Old versions of utf8::is_utf8() didn't properly handle magical vars (e.g. $1). + # The following forces a fetch to occur beforehand. + my $dummy = substr($_[0], 0, 0); + + if (utf8::is_utf8($_[0])) { + my $s = shift; + utf8::encode($s); + unshift(@_, $s); + } + + return join '', @URI::Escape::escapes{split //, $_[0]}; +} + +1; diff --git a/lib/URI/Heuristic.pm b/lib/URI/Heuristic.pm new file mode 100644 index 0000000..d4ace34 --- /dev/null +++ b/lib/URI/Heuristic.pm @@ -0,0 +1,253 @@ +package URI::Heuristic; + +=head1 NAME + +URI::Heuristic - Expand URI using heuristics + +=head1 SYNOPSIS + + use URI::Heuristic qw(uf_uristr); + $u = uf_uristr("perl"); # http://www.perl.com + $u = uf_uristr("www.sol.no/sol"); # http://www.sol.no/sol + $u = uf_uristr("aas"); # http://www.aas.no + $u = uf_uristr("ftp.funet.fi"); # ftp://ftp.funet.fi + $u = uf_uristr("/etc/passwd"); # file:/etc/passwd + +=head1 DESCRIPTION + +This module provides functions that expand strings into real absolute +URIs using some built-in heuristics. Strings that already represent +absolute URIs (i.e. that start with a C<scheme:> part) are never modified +and are returned unchanged. The main use of these functions is to +allow abbreviated URIs similar to what many web browsers allow for URIs +typed in by the user. + +The following functions are provided: + +=over 4 + +=item uf_uristr($str) + +Tries to make the argument string +into a proper absolute URI string. The "uf_" prefix stands for "User +Friendly". Under MacOS, it assumes that any string with a common URL +scheme (http, ftp, etc.) is a URL rather than a local path. So don't name +your volumes after common URL schemes and expect uf_uristr() to construct +valid file: URL's on those volumes for you, because it won't. + +=item uf_uri($str) + +Works the same way as uf_uristr() but +returns a C<URI> object. + +=back + +=head1 ENVIRONMENT + +If the hostname portion of a URI does not contain any dots, then +certain qualified guesses are made. These guesses are governed by +the following environment variables: + +=over 10 + +=item COUNTRY + +The two-letter country code (ISO 3166) for your location. If +the domain name of your host ends with two letters, then it is taken +to be the default country. See also L<Locale::Country>. + +=item HTTP_ACCEPT_LANGUAGE, LC_ALL, LANG + +If COUNTRY is not set, these standard environment variables are +examined and country (not language) information possibly found in them +is used as the default country. + +=item URL_GUESS_PATTERN + +Contains a space-separated list of URL patterns to try. The string +"ACME" is for some reason used as a placeholder for the host name in +the URL provided. Example: + + URL_GUESS_PATTERN="www.ACME.no www.ACME.se www.ACME.com" + export URL_GUESS_PATTERN + +Specifying URL_GUESS_PATTERN disables any guessing rules based on +country. An empty URL_GUESS_PATTERN disables any guessing that +involves host name lookups. + +=back + +=head1 COPYRIGHT + +Copyright 1997-1998, Gisle Aas + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut + +use strict; +use warnings; + +use Exporter 5.57 'import'; +our @EXPORT_OK = qw(uf_uri uf_uristr uf_url uf_urlstr); +our $VERSION = "4.20"; + +our ($MY_COUNTRY, $DEBUG); + +sub MY_COUNTRY() { + for ($MY_COUNTRY) { + return $_ if defined; + + # First try the environment. + $_ = $ENV{COUNTRY}; + return $_ if defined; + + # Try the country part of LC_ALL and LANG from environment + my @srcs = ($ENV{LC_ALL}, $ENV{LANG}); + # ...and HTTP_ACCEPT_LANGUAGE before those if present + if (my $httplang = $ENV{HTTP_ACCEPT_LANGUAGE}) { + # TODO: q-value processing/ordering + for $httplang (split(/\s*,\s*/, $httplang)) { + if ($httplang =~ /^\s*([a-zA-Z]+)[_-]([a-zA-Z]{2})\s*$/) { + unshift(@srcs, "${1}_${2}"); + last; + } + } + } + for (@srcs) { + next unless defined; + return lc($1) if /^[a-zA-Z]+_([a-zA-Z]{2})(?:[.@]|$)/; + } + + # Last bit of domain name. This may access the network. + require Net::Domain; + my $fqdn = Net::Domain::hostfqdn(); + $_ = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/; + return $_ if defined; + + # Give up. Defined but false. + return ($_ = 0); + } +} + +our %LOCAL_GUESSING = +( + 'us' => [qw(www.ACME.gov www.ACME.mil)], + 'gb' => [qw(www.ACME.co.uk www.ACME.org.uk www.ACME.ac.uk)], + 'au' => [qw(www.ACME.com.au www.ACME.org.au www.ACME.edu.au)], + 'il' => [qw(www.ACME.co.il www.ACME.org.il www.ACME.net.il)], + # send corrections and new entries to <gisle@aas.no> +); +# Backwards compatibility; uk != United Kingdom in ISO 3166 +$LOCAL_GUESSING{uk} = $LOCAL_GUESSING{gb}; + + +sub uf_uristr ($) +{ + local($_) = @_; + print STDERR "uf_uristr: resolving $_\n" if $DEBUG; + return unless defined; + + s/^\s+//; + s/\s+$//; + + if (/^(www|web|home)[a-z0-9-]*(?:\.|$)/i) { + $_ = "http://$_"; + + } elsif (/^(ftp|gopher|news|wais|https|http)[a-z0-9-]*(?:\.|$)/i) { + $_ = lc($1) . "://$_"; + + } elsif ($^O ne "MacOS" && + (m,^/, || # absolute file name + m,^\.\.?/, || # relative file name + m,^[a-zA-Z]:[/\\],) # dosish file name + ) + { + $_ = "file:$_"; + + } elsif ($^O eq "MacOS" && m/:/) { + # potential MacOS file name + unless (m/^(ftp|gopher|news|wais|http|https|mailto):/) { + require URI::file; + my $a = URI::file->new($_)->as_string; + $_ = ($a =~ m/^file:/) ? $a : "file:$a"; + } + } elsif (/^\w+([\.\-]\w+)*\@(\w+\.)+\w{2,3}$/) { + $_ = "mailto:$_"; + + } elsif (!/^[a-zA-Z][a-zA-Z0-9.+\-]*:/) { # no scheme specified + if (s/^([-\w]+(?:\.[-\w]+)*)([\/:\?\#]|$)/$2/) { + my $host = $1; + + my $scheme = "http"; + if (/^:(\d+)\b/) { + # Some more or less well known ports + if ($1 =~ /^[56789]?443$/) { + $scheme = "https"; + } elsif ($1 eq "21") { + $scheme = "ftp"; + } + } + + if ($host !~ /\./ && $host ne "localhost") { + my @guess; + if (exists $ENV{URL_GUESS_PATTERN}) { + @guess = map { s/\bACME\b/$host/; $_ } + split(' ', $ENV{URL_GUESS_PATTERN}); + } else { + if (MY_COUNTRY()) { + my $special = $LOCAL_GUESSING{MY_COUNTRY()}; + if ($special) { + my @special = @$special; + push(@guess, map { s/\bACME\b/$host/; $_ } + @special); + } else { + push(@guess, "www.$host." . MY_COUNTRY()); + } + } + push(@guess, map "www.$host.$_", + "com", "org", "net", "edu", "int"); + } + + + my $guess; + for $guess (@guess) { + print STDERR "uf_uristr: gethostbyname('$guess.')..." + if $DEBUG; + if (gethostbyname("$guess.")) { + print STDERR "yes\n" if $DEBUG; + $host = $guess; + last; + } + print STDERR "no\n" if $DEBUG; + } + } + $_ = "$scheme://$host$_"; + + } else { + # pure junk, just return it unchanged... + + } + } + print STDERR "uf_uristr: ==> $_\n" if $DEBUG; + + $_; +} + +sub uf_uri ($) +{ + require URI; + URI->new(uf_uristr($_[0])); +} + +# legacy +*uf_urlstr = \*uf_uristr; + +sub uf_url ($) +{ + require URI::URL; + URI::URL->new(uf_uristr($_[0])); +} + +1; diff --git a/lib/URI/IRI.pm b/lib/URI/IRI.pm new file mode 100644 index 0000000..8906399 --- /dev/null +++ b/lib/URI/IRI.pm @@ -0,0 +1,47 @@ +package URI::IRI; + +# Experimental + +use strict; +use warnings; +use URI (); + +use overload '""' => sub { shift->as_string }; + +our $VERSION = "1.69"; + +sub new { + my($class, $uri, $scheme) = @_; + utf8::upgrade($uri); + return bless { + uri => URI->new($uri, $scheme), + }, $class; +} + +sub clone { + my $self = shift; + return bless { + uri => $self->{uri}->clone, + }, ref($self); +} + +sub as_string { + my $self = shift; + return $self->{uri}->as_iri; +} + +our $AUTOLOAD; +sub AUTOLOAD +{ + my $method = substr($AUTOLOAD, rindex($AUTOLOAD, '::')+2); + + # We create the function here so that it will not need to be + # autoloaded the next time. + no strict 'refs'; + *$method = sub { shift->{uri}->$method(@_) }; + goto &$method; +} + +sub DESTROY {} # avoid AUTOLOADing it + +1; diff --git a/lib/URI/QueryParam.pm b/lib/URI/QueryParam.pm new file mode 100644 index 0000000..7866f79 --- /dev/null +++ b/lib/URI/QueryParam.pm @@ -0,0 +1,207 @@ +package URI::QueryParam; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +sub URI::_query::query_param { + my $self = shift; + my @old = $self->query_form; + + if (@_ == 0) { + # get keys + my (%seen, $i); + return grep !($i++ % 2 || $seen{$_}++), @old; + } + + my $key = shift; + my @i = grep $_ % 2 == 0 && $old[$_] eq $key, 0 .. $#old; + + if (@_) { + my @new = @old; + my @new_i = @i; + my @vals = map { ref($_) eq 'ARRAY' ? @$_ : $_ } @_; + + while (@new_i > @vals) { + splice @new, pop @new_i, 2; + } + if (@vals > @new_i) { + my $i = @new_i ? $new_i[-1] + 2 : @new; + my @splice = splice @vals, @new_i, @vals - @new_i; + + splice @new, $i, 0, map { $key => $_ } @splice; + } + if (@vals) { + #print "SET $new_i[0]\n"; + @new[ map $_ + 1, @new_i ] = @vals; + } + + $self->query_form(\@new); + } + + return wantarray ? @old[map $_+1, @i] : @i ? $old[$i[0]+1] : undef; +} + +sub URI::_query::query_param_append { + my $self = shift; + my $key = shift; + my @vals = map { ref $_ eq 'ARRAY' ? @$_ : $_ } @_; + $self->query_form($self->query_form, $key => \@vals); # XXX + return; +} + +sub URI::_query::query_param_delete { + my $self = shift; + my $key = shift; + my @old = $self->query_form; + my @vals; + + for (my $i = @old - 2; $i >= 0; $i -= 2) { + next if $old[$i] ne $key; + push(@vals, (splice(@old, $i, 2))[1]); + } + $self->query_form(\@old) if @vals; + return wantarray ? reverse @vals : $vals[-1]; +} + +sub URI::_query::query_form_hash { + my $self = shift; + my @old = $self->query_form; + if (@_) { + $self->query_form(@_ == 1 ? %{shift(@_)} : @_); + } + my %hash; + while (my($k, $v) = splice(@old, 0, 2)) { + if (exists $hash{$k}) { + for ($hash{$k}) { + $_ = [$_] unless ref($_) eq "ARRAY"; + push(@$_, $v); + } + } + else { + $hash{$k} = $v; + } + } + return \%hash; +} + +1; + +__END__ + +=head1 NAME + +URI::QueryParam - Additional query methods for URIs + +=head1 SYNOPSIS + + use URI; + use URI::QueryParam; + + $u = URI->new("", "http"); + $u->query_param(foo => 1, 2, 3); + print $u->query; # prints foo=1&foo=2&foo=3 + + for my $key ($u->query_param) { + print "$key: ", join(", ", $u->query_param($key)), "\n"; + } + +=head1 DESCRIPTION + +Loading the C<URI::QueryParam> module adds some extra methods to +URIs that support query methods. These methods provide an alternative +interface to the $u->query_form data. + +The query_param_* methods have deliberately been made identical to the +interface of the corresponding C<CGI.pm> methods. + +The following additional methods are made available: + +=over + +=item @keys = $u->query_param + +=item @values = $u->query_param( $key ) + +=item $first_value = $u->query_param( $key ) + +=item $u->query_param( $key, $value,... ) + +If $u->query_param is called with no arguments, it returns all the +distinct parameter keys of the URI. In a scalar context it returns the +number of distinct keys. + +When a $key argument is given, the method returns the parameter values with the +given key. In a scalar context, only the first parameter value is +returned. + +If additional arguments are given, they are used to update successive +parameters with the given key. If any of the values provided are +array references, then the array is dereferenced to get the actual +values. + +Please note that you can supply multiple values to this method, but you cannot +supply multiple keys. + +Do this: + + $uri->query_param( widget_id => 1, 5, 9 ); + +Do NOT do this: + + $uri->query_param( widget_id => 1, frobnicator_id => 99 ); + +=item $u->query_param_append($key, $value,...) + +Adds new parameters with the given +key without touching any old parameters with the same key. It +can be explained as a more efficient version of: + + $u->query_param($key, + $u->query_param($key), + $value,...); + +One difference is that this expression would return the old values +of $key, whereas the query_param_append() method does not. + +=item @values = $u->query_param_delete($key) + +=item $first_value = $u->query_param_delete($key) + +Deletes all key/value pairs with the given key. +The old values are returned. In a scalar context, only the first value +is returned. + +Using the query_param_delete() method is slightly more efficient than +the equivalent: + + $u->query_param($key, []); + +=item $hashref = $u->query_form_hash + +=item $u->query_form_hash( \%new_form ) + +Returns a reference to a hash that represents the +query form's key/value pairs. If a key occurs multiple times, then the hash +value becomes an array reference. + +Note that sequence information is lost. This means that: + + $u->query_form_hash($u->query_form_hash); + +is not necessarily a no-op, as it may reorder the key/value pairs. +The values returned by the query_param() method should stay the same +though. + +=back + +=head1 SEE ALSO + +L<URI>, L<CGI> + +=head1 COPYRIGHT + +Copyright 2002 Gisle Aas. + +=cut diff --git a/lib/URI/Split.pm b/lib/URI/Split.pm new file mode 100644 index 0000000..6762b3e --- /dev/null +++ b/lib/URI/Split.pm @@ -0,0 +1,97 @@ +package URI::Split; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use Exporter 5.57 'import'; +our @EXPORT_OK = qw(uri_split uri_join); + +use URI::Escape (); + +sub uri_split { + return $_[0] =~ m,(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?,; +} + +sub uri_join { + my($scheme, $auth, $path, $query, $frag) = @_; + my $uri = defined($scheme) ? "$scheme:" : ""; + $path = "" unless defined $path; + if (defined $auth) { + $auth =~ s,([/?\#]), URI::Escape::escape_char($1),eg; + $uri .= "//$auth"; + $path = "/$path" if length($path) && $path !~ m,^/,; + } + elsif ($path =~ m,^//,) { + $uri .= "//"; # XXX force empty auth + } + unless (length $uri) { + $path =~ s,(:), URI::Escape::escape_char($1),e while $path =~ m,^[^:/?\#]+:,; + } + $path =~ s,([?\#]), URI::Escape::escape_char($1),eg; + $uri .= $path; + if (defined $query) { + $query =~ s,(\#), URI::Escape::escape_char($1),eg; + $uri .= "?$query"; + } + $uri .= "#$frag" if defined $frag; + $uri; +} + +1; + +__END__ + +=head1 NAME + +URI::Split - Parse and compose URI strings + +=head1 SYNOPSIS + + use URI::Split qw(uri_split uri_join); + ($scheme, $auth, $path, $query, $frag) = uri_split($uri); + $uri = uri_join($scheme, $auth, $path, $query, $frag); + +=head1 DESCRIPTION + +Provides functions to parse and compose URI +strings. The following functions are provided: + +=over + +=item ($scheme, $auth, $path, $query, $frag) = uri_split($uri) + +Breaks up a URI string into its component +parts. An C<undef> value is returned for those parts that are not +present. The $path part is always present (but can be the empty +string) and is thus never returned as C<undef>. + +No sensible value is returned if this function is called in a scalar +context. + +=item $uri = uri_join($scheme, $auth, $path, $query, $frag) + +Puts together a URI string from its parts. +Missing parts are signaled by passing C<undef> for the corresponding +argument. + +Minimal escaping is applied to parts that contain reserved chars +that would confuse a parser. For instance, any occurrence of '?' or '#' +in $path is always escaped, as it would otherwise be parsed back +as a query or fragment. + +=back + +=head1 SEE ALSO + +L<URI>, L<URI::Escape> + +=head1 COPYRIGHT + +Copyright 2003, Gisle Aas + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut diff --git a/lib/URI/URL.pm b/lib/URI/URL.pm new file mode 100644 index 0000000..ba05eca --- /dev/null +++ b/lib/URI/URL.pm @@ -0,0 +1,303 @@ +package URI::URL; + +use strict; +use warnings; + +use parent 'URI::WithBase'; + +our $VERSION = "5.04"; + +# Provide as much as possible of the old URI::URL interface for backwards +# compatibility... + +use Exporter 5.57 'import'; +our @EXPORT = qw(url); + +# Easy to use constructor +sub url ($;$) { URI::URL->new(@_); } + +use URI::Escape qw(uri_unescape); + +sub new +{ + my $class = shift; + my $self = $class->SUPER::new(@_); + $self->[0] = $self->[0]->canonical; + $self; +} + +sub newlocal +{ + my $class = shift; + require URI::file; + bless [URI::file->new_abs(shift)], $class; +} + +{package URI::_foreign; + sub _init # hope it is not defined + { + my $class = shift; + die "Unknown URI::URL scheme $_[1]:" if $URI::URL::STRICT; + $class->SUPER::_init(@_); + } +} + +sub strict +{ + my $old = $URI::URL::STRICT; + $URI::URL::STRICT = shift if @_; + $old; +} + +sub print_on +{ + my $self = shift; + require Data::Dumper; + print STDERR Data::Dumper::Dumper($self); +} + +sub _try +{ + my $self = shift; + my $method = shift; + scalar(eval { $self->$method(@_) }); +} + +sub crack +{ + # should be overridden by subclasses + my $self = shift; + (scalar($self->scheme), + $self->_try("user"), + $self->_try("password"), + $self->_try("host"), + $self->_try("port"), + $self->_try("path"), + $self->_try("params"), + $self->_try("query"), + scalar($self->fragment), + ) +} + +sub full_path +{ + my $self = shift; + my $path = $self->path_query; + $path = "/" unless length $path; + $path; +} + +sub netloc +{ + shift->authority(@_); +} + +sub epath +{ + my $path = shift->SUPER::path(@_); + $path =~ s/;.*//; + $path; +} + +sub eparams +{ + my $self = shift; + my @p = $self->path_segments; + return undef unless ref($p[-1]); + @p = @{$p[-1]}; + shift @p; + join(";", @p); +} + +sub params { shift->eparams(@_); } + +sub path { + my $self = shift; + my $old = $self->epath(@_); + return unless defined wantarray; + return '/' if !defined($old) || !length($old); + Carp::croak("Path components contain '/' (you must call epath)") + if $old =~ /%2[fF]/ and !@_; + $old = "/$old" if $old !~ m|^/| && defined $self->netloc; + return uri_unescape($old); +} + +sub path_components { + shift->path_segments(@_); +} + +sub query { + my $self = shift; + my $old = $self->equery(@_); + if (defined(wantarray) && defined($old)) { + if ($old =~ /%(?:26|2[bB]|3[dD])/) { # contains escaped '=' '&' or '+' + my $mess; + for ($old) { + $mess = "Query contains both '+' and '%2B'" + if /\+/ && /%2[bB]/; + $mess = "Form query contains escaped '=' or '&'" + if /=/ && /%(?:3[dD]|26)/; + } + if ($mess) { + Carp::croak("$mess (you must call equery)"); + } + } + # Now it should be safe to unescape the string without losing + # information + return uri_unescape($old); + } + undef; + +} + +sub abs +{ + my $self = shift; + my $base = shift; + my $allow_scheme = shift; + $allow_scheme = $URI::URL::ABS_ALLOW_RELATIVE_SCHEME + unless defined $allow_scheme; + local $URI::ABS_ALLOW_RELATIVE_SCHEME = $allow_scheme; + local $URI::ABS_REMOTE_LEADING_DOTS = $URI::URL::ABS_REMOTE_LEADING_DOTS; + $self->SUPER::abs($base); +} + +sub frag { shift->fragment(@_); } +sub keywords { shift->query_keywords(@_); } + +# file: +sub local_path { shift->file; } +sub unix_path { shift->file("unix"); } +sub dos_path { shift->file("dos"); } +sub mac_path { shift->file("mac"); } +sub vms_path { shift->file("vms"); } + +# mailto: +sub address { shift->to(@_); } +sub encoded822addr { shift->to(@_); } +sub URI::mailto::authority { shift->to(@_); } # make 'netloc' method work + +# news: +sub groupart { shift->_group(@_); } +sub article { shift->message(@_); } + +1; + +__END__ + +=head1 NAME + +URI::URL - Uniform Resource Locators + +=head1 SYNOPSIS + + $u1 = URI::URL->new($str, $base); + $u2 = $u1->abs; + +=head1 DESCRIPTION + +This module is provided for backwards compatibility with modules that +depend on the interface provided by the C<URI::URL> class that used to +be distributed with the libwww-perl library. + +The following differences exist compared to the C<URI> class interface: + +=over 3 + +=item * + +The URI::URL module exports the url() function as an alternate +constructor interface. + +=item * + +The constructor takes an optional $base argument. The C<URI::URL> +class is a subclass of C<URI::WithBase>. + +=item * + +The URI::URL->newlocal class method is the same as URI::file->new_abs. + +=item * + +URI::URL::strict(1) + +=item * + +$url->print_on method + +=item * + +$url->crack method + +=item * + +$url->full_path: same as ($uri->abs_path || "/") + +=item * + +$url->netloc: same as $uri->authority + +=item * + +$url->epath, $url->equery: same as $uri->path, $uri->query + +=item * + +$url->path and $url->query pass unescaped strings. + +=item * + +$url->path_components: same as $uri->path_segments (if you don't +consider path segment parameters) + +=item * + +$url->params and $url->eparams methods + +=item * + +$url->base method. See L<URI::WithBase>. + +=item * + +$url->abs and $url->rel have an optional $base argument. See +L<URI::WithBase>. + +=item * + +$url->frag: same as $uri->fragment + +=item * + +$url->keywords: same as $uri->query_keywords + +=item * + +$url->localpath and friends map to $uri->file. + +=item * + +$url->address and $url->encoded822addr: same as $uri->to for mailto URI + +=item * + +$url->groupart method for news URI + +=item * + +$url->article: same as $uri->message + +=back + + + +=head1 SEE ALSO + +L<URI>, L<URI::WithBase> + +=head1 COPYRIGHT + +Copyright 1998-2000 Gisle Aas. + +=cut diff --git a/lib/URI/WithBase.pm b/lib/URI/WithBase.pm new file mode 100644 index 0000000..943b7b5 --- /dev/null +++ b/lib/URI/WithBase.pm @@ -0,0 +1,174 @@ +package URI::WithBase; + +use strict; +use warnings; + +use URI; +use Scalar::Util 'blessed'; + +our $VERSION = "2.20"; + +use overload '""' => "as_string", fallback => 1; + +sub as_string; # help overload find it + +sub new +{ + my($class, $uri, $base) = @_; + my $ibase = $base; + if ($base && blessed($base) && $base->isa(__PACKAGE__)) { + $base = $base->abs; + $ibase = $base->[0]; + } + bless [URI->new($uri, $ibase), $base], $class; +} + +sub new_abs +{ + my $class = shift; + my $self = $class->new(@_); + $self->abs; +} + +sub _init +{ + my $class = shift; + my($str, $scheme) = @_; + bless [URI->new($str, $scheme), undef], $class; +} + +sub eq +{ + my($self, $other) = @_; + $other = $other->[0] if blessed($other) and $other->isa(__PACKAGE__); + $self->[0]->eq($other); +} + +our $AUTOLOAD; +sub AUTOLOAD +{ + my $self = shift; + my $method = substr($AUTOLOAD, rindex($AUTOLOAD, '::')+2); + return if $method eq "DESTROY"; + $self->[0]->$method(@_); +} + +sub can { # override UNIVERSAL::can + my $self = shift; + $self->SUPER::can(@_) || ( + ref($self) + ? $self->[0]->can(@_) + : undef + ) +} + +sub base { + my $self = shift; + my $base = $self->[1]; + + if (@_) { # set + my $new_base = shift; + # ensure absoluteness + $new_base = $new_base->abs if ref($new_base) && $new_base->isa(__PACKAGE__); + $self->[1] = $new_base; + } + return unless defined wantarray; + + # The base attribute supports 'lazy' conversion from URL strings + # to URL objects. Strings may be stored but when a string is + # fetched it will automatically be converted to a URL object. + # The main benefit is to make it much cheaper to say: + # URI::WithBase->new($random_url_string, 'http:') + if (defined($base) && !ref($base)) { + $base = ref($self)->new($base); + $self->[1] = $base unless @_; + } + $base; +} + +sub clone +{ + my $self = shift; + my $base = $self->[1]; + $base = $base->clone if ref($base); + bless [$self->[0]->clone, $base], ref($self); +} + +sub abs +{ + my $self = shift; + my $base = shift || $self->base || return $self->clone; + $base = $base->as_string if ref($base); + bless [$self->[0]->abs($base, @_), $base], ref($self); +} + +sub rel +{ + my $self = shift; + my $base = shift || $self->base || return $self->clone; + $base = $base->as_string if ref($base); + bless [$self->[0]->rel($base, @_), $base], ref($self); +} + +1; + +__END__ + +=head1 NAME + +URI::WithBase - URIs which remember their base + +=head1 SYNOPSIS + + $u1 = URI::WithBase->new($str, $base); + $u2 = $u1->abs; + + $base = $u1->base; + $u1->base( $new_base ) + +=head1 DESCRIPTION + +This module provides the C<URI::WithBase> class. Objects of this class +are like C<URI> objects, but can keep their base too. The base +represents the context where this URI was found and can be used to +absolutize or relativize the URI. All the methods described in L<URI> +are supported for C<URI::WithBase> objects. + +The methods provided in addition to or modified from those of C<URI> are: + +=over 4 + +=item $uri = URI::WithBase->new($str, [$base]) + +The constructor takes an optional base URI as the second argument. +If provided, this argument initializes the base attribute. + +=item $uri->base( [$new_base] ) + +Can be used to get or set the value of the base attribute. +The return value, which is the old value, is a URI object or C<undef>. + +=item $uri->abs( [$base_uri] ) + +The $base_uri argument is now made optional as the object carries its +base with it. A new object is returned even if $uri is already +absolute (while plain URI objects simply return themselves in +that case). + +=item $uri->rel( [$base_uri] ) + +The $base_uri argument is now made optional as the object carries its +base with it. A new object is always returned. + +=back + + +=head1 SEE ALSO + +L<URI> + +=head1 COPYRIGHT + +Copyright 1998-2002 Gisle Aas. + +=cut diff --git a/lib/URI/_foreign.pm b/lib/URI/_foreign.pm new file mode 100644 index 0000000..cad5c8d --- /dev/null +++ b/lib/URI/_foreign.pm @@ -0,0 +1,10 @@ +package URI::_foreign; + +use strict; +use warnings; + +use parent 'URI::_generic'; + +our $VERSION = "1.69"; + +1; diff --git a/lib/URI/_generic.pm b/lib/URI/_generic.pm new file mode 100644 index 0000000..42f6609 --- /dev/null +++ b/lib/URI/_generic.pm @@ -0,0 +1,256 @@ +package URI::_generic; + +use strict; +use warnings; + +use parent qw(URI URI::_query); + +use URI::Escape qw(uri_unescape); +use Carp (); + +our $VERSION = "1.69"; + +my $ACHAR = $URI::uric; $ACHAR =~ s,\\[/?],,g; +my $PCHAR = $URI::uric; $PCHAR =~ s,\\[?],,g; + +sub _no_scheme_ok { 1 } + +sub authority +{ + my $self = shift; + $$self =~ m,^((?:$URI::scheme_re:)?)(?://([^/?\#]*))?(.*)$,os or die; + + if (@_) { + my $auth = shift; + $$self = $1; + my $rest = $3; + if (defined $auth) { + $auth =~ s/([^$ACHAR])/ URI::Escape::escape_char($1)/ego; + utf8::downgrade($auth); + $$self .= "//$auth"; + } + _check_path($rest, $$self); + $$self .= $rest; + } + $2; +} + +sub path +{ + my $self = shift; + $$self =~ m,^((?:[^:/?\#]+:)?(?://[^/?\#]*)?)([^?\#]*)(.*)$,s or die; + + if (@_) { + $$self = $1; + my $rest = $3; + my $new_path = shift; + $new_path = "" unless defined $new_path; + $new_path =~ s/([^$PCHAR])/ URI::Escape::escape_char($1)/ego; + utf8::downgrade($new_path); + _check_path($new_path, $$self); + $$self .= $new_path . $rest; + } + $2; +} + +sub path_query +{ + my $self = shift; + $$self =~ m,^((?:[^:/?\#]+:)?(?://[^/?\#]*)?)([^\#]*)(.*)$,s or die; + + if (@_) { + $$self = $1; + my $rest = $3; + my $new_path = shift; + $new_path = "" unless defined $new_path; + $new_path =~ s/([^$URI::uric])/ URI::Escape::escape_char($1)/ego; + utf8::downgrade($new_path); + _check_path($new_path, $$self); + $$self .= $new_path . $rest; + } + $2; +} + +sub _check_path +{ + my($path, $pre) = @_; + my $prefix; + if ($pre =~ m,/,) { # authority present + $prefix = "/" if length($path) && $path !~ m,^[/?\#],; + } + else { + if ($path =~ m,^//,) { + Carp::carp("Path starting with double slash is confusing") + if $^W; + } + elsif (!length($pre) && $path =~ m,^[^:/?\#]+:,) { + Carp::carp("Path might look like scheme, './' prepended") + if $^W; + $prefix = "./"; + } + } + substr($_[0], 0, 0) = $prefix if defined $prefix; +} + +sub path_segments +{ + my $self = shift; + my $path = $self->path; + if (@_) { + my @arg = @_; # make a copy + for (@arg) { + if (ref($_)) { + my @seg = @$_; + $seg[0] =~ s/%/%25/g; + for (@seg) { s/;/%3B/g; } + $_ = join(";", @seg); + } + else { + s/%/%25/g; s/;/%3B/g; + } + s,/,%2F,g; + } + $self->path(join("/", @arg)); + } + return $path unless wantarray; + map {/;/ ? $self->_split_segment($_) + : uri_unescape($_) } + split('/', $path, -1); +} + + +sub _split_segment +{ + my $self = shift; + require URI::_segment; + URI::_segment->new(@_); +} + + +sub abs +{ + my $self = shift; + my $base = shift || Carp::croak("Missing base argument"); + + if (my $scheme = $self->scheme) { + return $self unless $URI::ABS_ALLOW_RELATIVE_SCHEME; + $base = URI->new($base) unless ref $base; + return $self unless $scheme eq $base->scheme; + } + + $base = URI->new($base) unless ref $base; + my $abs = $self->clone; + $abs->scheme($base->scheme); + return $abs if $$self =~ m,^(?:$URI::scheme_re:)?//,o; + $abs->authority($base->authority); + + my $path = $self->path; + return $abs if $path =~ m,^/,; + + if (!length($path)) { + my $abs = $base->clone; + my $query = $self->query; + $abs->query($query) if defined $query; + my $fragment = $self->fragment; + $abs->fragment($fragment) if defined $fragment; + return $abs; + } + + my $p = $base->path; + $p =~ s,[^/]+$,,; + $p .= $path; + my @p = split('/', $p, -1); + shift(@p) if @p && !length($p[0]); + my $i = 1; + while ($i < @p) { + #print "$i ", join("/", @p), " ($p[$i])\n"; + if ($p[$i-1] eq ".") { + splice(@p, $i-1, 1); + $i-- if $i > 1; + } + elsif ($p[$i] eq ".." && $p[$i-1] ne "..") { + splice(@p, $i-1, 2); + if ($i > 1) { + $i--; + push(@p, "") if $i == @p; + } + } + else { + $i++; + } + } + $p[-1] = "" if @p && $p[-1] eq "."; # trailing "/." + if ($URI::ABS_REMOTE_LEADING_DOTS) { + shift @p while @p && $p[0] =~ /^\.\.?$/; + } + $abs->path("/" . join("/", @p)); + $abs; +} + +# The opposite of $url->abs. Return a URI which is as relative as possible +sub rel { + my $self = shift; + my $base = shift || Carp::croak("Missing base argument"); + my $rel = $self->clone; + $base = URI->new($base) unless ref $base; + + #my($scheme, $auth, $path) = @{$rel}{qw(scheme authority path)}; + my $scheme = $rel->scheme; + my $auth = $rel->canonical->authority; + my $path = $rel->path; + + if (!defined($scheme) && !defined($auth)) { + # it is already relative + return $rel; + } + + #my($bscheme, $bauth, $bpath) = @{$base}{qw(scheme authority path)}; + my $bscheme = $base->scheme; + my $bauth = $base->canonical->authority; + my $bpath = $base->path; + + for ($bscheme, $bauth, $auth) { + $_ = '' unless defined + } + + unless ($scheme eq $bscheme && $auth eq $bauth) { + # different location, can't make it relative + return $rel; + } + + for ($path, $bpath) { $_ = "/$_" unless m,^/,; } + + # Make it relative by eliminating scheme and authority + $rel->scheme(undef); + $rel->authority(undef); + + # This loop is based on code from Nicolai Langfeldt <janl@ifi.uio.no>. + # First we calculate common initial path components length ($li). + my $li = 1; + while (1) { + my $i = index($path, '/', $li); + last if $i < 0 || + $i != index($bpath, '/', $li) || + substr($path,$li,$i-$li) ne substr($bpath,$li,$i-$li); + $li=$i+1; + } + # then we nuke it from both paths + substr($path, 0,$li) = ''; + substr($bpath,0,$li) = ''; + + if ($path eq $bpath && + defined($rel->fragment) && + !defined($rel->query)) { + $rel->path(""); + } + else { + # Add one "../" for each path component left in the base path + $path = ('../' x $bpath =~ tr|/|/|) . $path; + $path = "./" if $path eq ""; + $rel->path($path); + } + + $rel; +} + +1; diff --git a/lib/URI/_idna.pm b/lib/URI/_idna.pm new file mode 100644 index 0000000..ce58db8 --- /dev/null +++ b/lib/URI/_idna.pm @@ -0,0 +1,91 @@ +package URI::_idna; + +# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep) +# based on Python-2.6.4/Lib/encodings/idna.py + +use strict; +use warnings; + +use URI::_punycode qw(encode_punycode decode_punycode); +use Carp qw(croak); + +our $VERSION = "1.69"; + +BEGIN { + *URI::_idna::_ENV_::JOIN_LEAKS_UTF8_FLAGS = $] < 5.008_003 + ? sub () { 1 } + : sub () { 0 } + ; +} + +my $ASCII = qr/^[\x00-\x7F]*\z/; + +sub encode { + my $idomain = shift; + my @labels = split(/\./, $idomain, -1); + my @last_empty; + push(@last_empty, pop @labels) if @labels > 1 && $labels[-1] eq ""; + for (@labels) { + $_ = ToASCII($_); + } + + return eval 'join(".", @labels, @last_empty)' if URI::_idna::_ENV_::JOIN_LEAKS_UTF8_FLAGS; + return join(".", @labels, @last_empty); +} + +sub decode { + my $domain = shift; + return join(".", map ToUnicode($_), split(/\./, $domain, -1)) +} + +sub nameprep { # XXX real implementation missing + my $label = shift; + $label = lc($label); + return $label; +} + +sub check_size { + my $label = shift; + croak "Label empty" if $label eq ""; + croak "Label too long" if length($label) > 63; + return $label; +} + +sub ToASCII { + my $label = shift; + return check_size($label) if $label =~ $ASCII; + + # Step 2: nameprep + $label = nameprep($label); + # Step 3: UseSTD3ASCIIRules is false + # Step 4: try ASCII again + return check_size($label) if $label =~ $ASCII; + + # Step 5: Check ACE prefix + if ($label =~ /^xn--/) { + croak "Label starts with ACE prefix"; + } + + # Step 6: Encode with PUNYCODE + $label = encode_punycode($label); + + # Step 7: Prepend ACE prefix + $label = "xn--$label"; + + # Step 8: Check size + return check_size($label); +} + +sub ToUnicode { + my $label = shift; + $label = nameprep($label) unless $label =~ $ASCII; + return $label unless $label =~ /^xn--/; + my $result = decode_punycode(substr($label, 4)); + my $label2 = ToASCII($result); + if (lc($label) ne $label2) { + croak "IDNA does not round-trip: '\L$label\E' vs '$label2'"; + } + return $result; +} + +1; diff --git a/lib/URI/_ldap.pm b/lib/URI/_ldap.pm new file mode 100644 index 0000000..02f468c --- /dev/null +++ b/lib/URI/_ldap.pm @@ -0,0 +1,140 @@ +# Copyright (c) 1998 Graham Barr <gbarr@pobox.com>. All rights reserved. +# This program is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. + +package URI::_ldap; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use URI::Escape qw(uri_unescape); + +sub _ldap_elem { + my $self = shift; + my $elem = shift; + my $query = $self->query; + my @bits = (split(/\?/,defined($query) ? $query : ""),("")x4); + my $old = $bits[$elem]; + + if (@_) { + my $new = shift; + $new =~ s/\?/%3F/g; + $bits[$elem] = $new; + $query = join("?",@bits); + $query =~ s/\?+$//; + $query = undef unless length($query); + $self->query($query); + } + + $old; +} + +sub dn { + my $old = shift->path(@_); + $old =~ s:^/::; + uri_unescape($old); +} + +sub attributes { + my $self = shift; + my $old = _ldap_elem($self,0, @_ ? join(",", map { my $tmp = $_; $tmp =~ s/,/%2C/g; $tmp } @_) : ()); + return $old unless wantarray; + map { uri_unescape($_) } split(/,/,$old); +} + +sub _scope { + my $self = shift; + my $old = _ldap_elem($self,1, @_); + return undef unless defined wantarray && defined $old; + uri_unescape($old); +} + +sub scope { + my $old = &_scope; + $old = "base" unless length $old; + $old; +} + +sub _filter { + my $self = shift; + my $old = _ldap_elem($self,2, @_); + return undef unless defined wantarray && defined $old; + uri_unescape($old); # || "(objectClass=*)"; +} + +sub filter { + my $old = &_filter; + $old = "(objectClass=*)" unless length $old; + $old; +} + +sub extensions { + my $self = shift; + my @ext; + while (@_) { + my $key = shift; + my $value = shift; + push(@ext, join("=", map { $_="" unless defined; s/,/%2C/g; $_ } $key, $value)); + } + @ext = join(",", @ext) if @ext; + my $old = _ldap_elem($self,3, @ext); + return $old unless wantarray; + map { uri_unescape($_) } map { /^([^=]+)=(.*)$/ } split(/,/,$old); +} + +sub canonical +{ + my $self = shift; + my $other = $self->_nonldap_canonical; + + # The stuff below is not as efficient as one might hope... + + $other = $other->clone if $other == $self; + + $other->dn(_normalize_dn($other->dn)); + + # Should really know about mixed case "postalAddress", etc... + $other->attributes(map lc, $other->attributes); + + # Lowercase scope, remove default + my $old_scope = $other->scope; + my $new_scope = lc($old_scope); + $new_scope = "" if $new_scope eq "base"; + $other->scope($new_scope) if $new_scope ne $old_scope; + + # Remove filter if default + my $old_filter = $other->filter; + $other->filter("") if lc($old_filter) eq "(objectclass=*)" || + lc($old_filter) eq "objectclass=*"; + + # Lowercase extensions types and deal with known extension values + my @ext = $other->extensions; + for (my $i = 0; $i < @ext; $i += 2) { + my $etype = $ext[$i] = lc($ext[$i]); + if ($etype =~ /^!?bindname$/) { + $ext[$i+1] = _normalize_dn($ext[$i+1]); + } + } + $other->extensions(@ext) if @ext; + + $other; +} + +sub _normalize_dn # RFC 2253 +{ + my $dn = shift; + + return $dn; + # The code below will fail if the "+" or "," is embedding in a quoted + # string or simply escaped... + + my @dn = split(/([+,])/, $dn); + for (@dn) { + s/^([a-zA-Z]+=)/lc($1)/e; + } + join("", @dn); +} + +1; diff --git a/lib/URI/_login.pm b/lib/URI/_login.pm new file mode 100644 index 0000000..ef58e24 --- /dev/null +++ b/lib/URI/_login.pm @@ -0,0 +1,13 @@ +package URI::_login; + +use strict; +use warnings; + +use parent qw(URI::_server URI::_userpass); + +our $VERSION = "1.69"; + +# Generic terminal logins. This is used as a base class for 'telnet', +# 'tn3270', and 'rlogin' URL schemes. + +1; diff --git a/lib/URI/_punycode.pm b/lib/URI/_punycode.pm new file mode 100644 index 0000000..f54eee5 --- /dev/null +++ b/lib/URI/_punycode.pm @@ -0,0 +1,203 @@ +package URI::_punycode; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use Exporter 'import'; +our @EXPORT = qw(encode_punycode decode_punycode); + +use integer; + +our $DEBUG = 0; + +use constant BASE => 36; +use constant TMIN => 1; +use constant TMAX => 26; +use constant SKEW => 38; +use constant DAMP => 700; +use constant INITIAL_BIAS => 72; +use constant INITIAL_N => 128; + +my $Delimiter = chr 0x2D; +my $BasicRE = qr/[\x00-\x7f]/; + +sub _croak { require Carp; Carp::croak(@_); } + +sub digit_value { + my $code = shift; + return ord($code) - ord("A") if $code =~ /[A-Z]/; + return ord($code) - ord("a") if $code =~ /[a-z]/; + return ord($code) - ord("0") + 26 if $code =~ /[0-9]/; + return; +} + +sub code_point { + my $digit = shift; + return $digit + ord('a') if 0 <= $digit && $digit <= 25; + return $digit + ord('0') - 26 if 26 <= $digit && $digit <= 36; + die 'NOT COME HERE'; +} + +sub adapt { + my($delta, $numpoints, $firsttime) = @_; + $delta = $firsttime ? $delta / DAMP : $delta / 2; + $delta += $delta / $numpoints; + my $k = 0; + while ($delta > ((BASE - TMIN) * TMAX) / 2) { + $delta /= BASE - TMIN; + $k += BASE; + } + return $k + (((BASE - TMIN + 1) * $delta) / ($delta + SKEW)); +} + +sub decode_punycode { + my $code = shift; + + my $n = INITIAL_N; + my $i = 0; + my $bias = INITIAL_BIAS; + my @output; + + if ($code =~ s/(.*)$Delimiter//o) { + push @output, map ord, split //, $1; + return _croak('non-basic code point') unless $1 =~ /^$BasicRE*$/o; + } + + while ($code) { + my $oldi = $i; + my $w = 1; + LOOP: + for (my $k = BASE; 1; $k += BASE) { + my $cp = substr($code, 0, 1, ''); + my $digit = digit_value($cp); + defined $digit or return _croak("invalid punycode input"); + $i += $digit * $w; + my $t = ($k <= $bias) ? TMIN + : ($k >= $bias + TMAX) ? TMAX : $k - $bias; + last LOOP if $digit < $t; + $w *= (BASE - $t); + } + $bias = adapt($i - $oldi, @output + 1, $oldi == 0); + warn "bias becomes $bias" if $DEBUG; + $n += $i / (@output + 1); + $i = $i % (@output + 1); + splice(@output, $i, 0, $n); + warn join " ", map sprintf('%04x', $_), @output if $DEBUG; + $i++; + } + return join '', map chr, @output; +} + +sub encode_punycode { + my $input = shift; + my @input = split //, $input; + + my $n = INITIAL_N; + my $delta = 0; + my $bias = INITIAL_BIAS; + + my @output; + my @basic = grep /$BasicRE/, @input; + my $h = my $b = @basic; + push @output, @basic; + push @output, $Delimiter if $b && $h < @input; + warn "basic codepoints: (@output)" if $DEBUG; + + while ($h < @input) { + my $m = min(grep { $_ >= $n } map ord, @input); + warn sprintf "next code point to insert is %04x", $m if $DEBUG; + $delta += ($m - $n) * ($h + 1); + $n = $m; + for my $i (@input) { + my $c = ord($i); + $delta++ if $c < $n; + if ($c == $n) { + my $q = $delta; + LOOP: + for (my $k = BASE; 1; $k += BASE) { + my $t = ($k <= $bias) ? TMIN : + ($k >= $bias + TMAX) ? TMAX : $k - $bias; + last LOOP if $q < $t; + my $cp = code_point($t + (($q - $t) % (BASE - $t))); + push @output, chr($cp); + $q = ($q - $t) / (BASE - $t); + } + push @output, chr(code_point($q)); + $bias = adapt($delta, $h + 1, $h == $b); + warn "bias becomes $bias" if $DEBUG; + $delta = 0; + $h++; + } + } + $delta++; + $n++; + } + return join '', @output; +} + +sub min { + my $min = shift; + for (@_) { $min = $_ if $_ <= $min } + return $min; +} + +1; +__END__ + +=head1 NAME + +URI::_punycode - encodes Unicode string in Punycode + +=head1 SYNOPSIS + + use URI::_punycode; + $punycode = encode_punycode($unicode); + $unicode = decode_punycode($punycode); + +=head1 DESCRIPTION + +URI::_punycode is a module to encode / decode Unicode strings into +Punycode, an efficient encoding of Unicode for use with IDNA. + +This module requires Perl 5.6.0 or over to handle UTF8 flagged Unicode +strings. + +=head1 FUNCTIONS + +This module exports following functions by default. + +=over 4 + +=item encode_punycode + + $punycode = encode_punycode($unicode); + +takes Unicode string (UTF8-flagged variable) and returns Punycode +encoding for it. + +=item decode_punycode + + $unicode = decode_punycode($punycode) + +takes Punycode encoding and returns original Unicode string. + +=back + +These functions throw exceptions on failure. You can catch 'em via +C<eval>. + +=head1 AUTHOR + +Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt> is the author of +IDNA::Punycode v0.02 which was the basis for this module. + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=head1 SEE ALSO + +L<IDNA::Punycode>, RFC 3492 + +=cut diff --git a/lib/URI/_query.pm b/lib/URI/_query.pm new file mode 100644 index 0000000..a5976a3 --- /dev/null +++ b/lib/URI/_query.pm @@ -0,0 +1,97 @@ +package URI::_query; + +use strict; +use warnings; + +use URI (); +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub query +{ + my $self = shift; + $$self =~ m,^([^?\#]*)(?:\?([^\#]*))?(.*)$,s or die; + + if (@_) { + my $q = shift; + $$self = $1; + if (defined $q) { + $q =~ s/([^$URI::uric])/ URI::Escape::escape_char($1)/ego; + utf8::downgrade($q); + $$self .= "?$q"; + } + $$self .= $3; + } + $2; +} + +# Handle ...?foo=bar&bar=foo type of query +sub query_form { + my $self = shift; + my $old = $self->query; + if (@_) { + # Try to set query string + my $delim; + my $r = $_[0]; + if (ref($r) eq "ARRAY") { + $delim = $_[1]; + @_ = @$r; + } + elsif (ref($r) eq "HASH") { + $delim = $_[1]; + @_ = map { $_ => $r->{$_} } sort keys %$r; + } + $delim = pop if @_ % 2; + + my @query; + while (my($key,$vals) = splice(@_, 0, 2)) { + $key = '' unless defined $key; + $key =~ s/([;\/?:@&=+,\$\[\]%])/ URI::Escape::escape_char($1)/eg; + $key =~ s/ /+/g; + $vals = [ref($vals) eq "ARRAY" ? @$vals : $vals]; + for my $val (@$vals) { + $val = '' unless defined $val; + $val =~ s/([;\/?:@&=+,\$\[\]%])/ URI::Escape::escape_char($1)/eg; + $val =~ s/ /+/g; + push(@query, "$key=$val"); + } + } + if (@query) { + unless ($delim) { + $delim = $1 if $old && $old =~ /([&;])/; + $delim ||= $URI::DEFAULT_QUERY_FORM_DELIMITER || "&"; + } + $self->query(join($delim, @query)); + } + else { + $self->query(undef); + } + } + return if !defined($old) || !length($old) || !defined(wantarray); + return unless $old =~ /=/; # not a form + map { s/\+/ /g; uri_unescape($_) } + map { /=/ ? split(/=/, $_, 2) : ($_ => '')} split(/[&;]/, $old); +} + +# Handle ...?dog+bones type of query +sub query_keywords +{ + my $self = shift; + my $old = $self->query; + if (@_) { + # Try to set query string + my @copy = @_; + @copy = @{$copy[0]} if @copy == 1 && ref($copy[0]) eq "ARRAY"; + for (@copy) { s/([;\/?:@&=+,\$\[\]%])/ URI::Escape::escape_char($1)/eg; } + $self->query(@copy ? join('+', @copy) : undef); + } + return if !defined($old) || !defined(wantarray); + return if $old =~ /=/; # not keywords, but a form + map { uri_unescape($_) } split(/\+/, $old, -1); +} + +# Some URI::URL compatibility stuff +sub equery { goto &query } + +1; diff --git a/lib/URI/_segment.pm b/lib/URI/_segment.pm new file mode 100644 index 0000000..3b27289 --- /dev/null +++ b/lib/URI/_segment.pm @@ -0,0 +1,24 @@ +package URI::_segment; + +# Represents a generic path_segment so that it can be treated as +# a string too. + +use strict; +use warnings; + +use URI::Escape qw(uri_unescape); + +use overload '""' => sub { $_[0]->[0] }, + fallback => 1; + +our $VERSION = "1.69"; + +sub new +{ + my $class = shift; + my @segment = split(';', shift, -1); + $segment[0] = uri_unescape($segment[0]); + bless \@segment, $class; +} + +1; diff --git a/lib/URI/_server.pm b/lib/URI/_server.pm new file mode 100644 index 0000000..4df158e --- /dev/null +++ b/lib/URI/_server.pm @@ -0,0 +1,166 @@ +package URI::_server; + +use strict; +use warnings; + +use parent 'URI::_generic'; + +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub _uric_escape { + my($class, $str) = @_; + if ($str =~ m,^((?:$URI::scheme_re:)?)//([^/?\#]*)(.*)$,os) { + my($scheme, $host, $rest) = ($1, $2, $3); + my $ui = $host =~ s/(.*@)// ? $1 : ""; + my $port = $host =~ s/(:\d+)\z// ? $1 : ""; + if (_host_escape($host)) { + $str = "$scheme//$ui$host$port$rest"; + } + } + return $class->SUPER::_uric_escape($str); +} + +sub _host_escape { + return unless $_[0] =~ /[^$URI::uric]/; + eval { + require URI::_idna; + $_[0] = URI::_idna::encode($_[0]); + }; + return 0 if $@; + return 1; +} + +sub as_iri { + my $self = shift; + my $str = $self->SUPER::as_iri; + if ($str =~ /\bxn--/) { + if ($str =~ m,^((?:$URI::scheme_re:)?)//([^/?\#]*)(.*)$,os) { + my($scheme, $host, $rest) = ($1, $2, $3); + my $ui = $host =~ s/(.*@)// ? $1 : ""; + my $port = $host =~ s/(:\d+)\z// ? $1 : ""; + require URI::_idna; + $host = URI::_idna::decode($host); + $str = "$scheme//$ui$host$port$rest"; + } + } + return $str; +} + +sub userinfo +{ + my $self = shift; + my $old = $self->authority; + + if (@_) { + my $new = $old; + $new = "" unless defined $new; + $new =~ s/.*@//; # remove old stuff + my $ui = shift; + if (defined $ui) { + $ui =~ s/@/%40/g; # protect @ + $new = "$ui\@$new"; + } + $self->authority($new); + } + return undef if !defined($old) || $old !~ /(.*)@/; + return $1; +} + +sub host +{ + my $self = shift; + my $old = $self->authority; + if (@_) { + my $tmp = $old; + $tmp = "" unless defined $tmp; + my $ui = ($tmp =~ /(.*@)/) ? $1 : ""; + my $port = ($tmp =~ /(:\d+)$/) ? $1 : ""; + my $new = shift; + $new = "" unless defined $new; + if (length $new) { + $new =~ s/[@]/%40/g; # protect @ + if ($new =~ /^[^:]*:\d*\z/ || $new =~ /]:\d*\z/) { + $new =~ s/(:\d*)\z// || die "Assert"; + $port = $1; + } + $new = "[$new]" if $new =~ /:/ && $new !~ /^\[/; # IPv6 address + _host_escape($new); + } + $self->authority("$ui$new$port"); + } + return undef unless defined $old; + $old =~ s/.*@//; + $old =~ s/:\d+$//; # remove the port + $old =~ s{^\[(.*)\]$}{$1}; # remove brackets around IPv6 (RFC 3986 3.2.2) + return uri_unescape($old); +} + +sub ihost +{ + my $self = shift; + my $old = $self->host(@_); + if ($old =~ /(^|\.)xn--/) { + require URI::_idna; + $old = URI::_idna::decode($old); + } + return $old; +} + +sub _port +{ + my $self = shift; + my $old = $self->authority; + if (@_) { + my $new = $old; + $new =~ s/:\d*$//; + my $port = shift; + $new .= ":$port" if defined $port; + $self->authority($new); + } + return $1 if defined($old) && $old =~ /:(\d*)$/; + return; +} + +sub port +{ + my $self = shift; + my $port = $self->_port(@_); + $port = $self->default_port if !defined($port) || $port eq ""; + $port; +} + +sub host_port +{ + my $self = shift; + my $old = $self->authority; + $self->host(shift) if @_; + return undef unless defined $old; + $old =~ s/.*@//; # zap userinfo + $old =~ s/:$//; # empty port should be treated the same a no port + $old .= ":" . $self->port unless $old =~ /:\d+$/; + $old; +} + + +sub default_port { undef } + +sub canonical +{ + my $self = shift; + my $other = $self->SUPER::canonical; + my $host = $other->host || ""; + my $port = $other->_port; + my $uc_host = $host =~ /[A-Z]/; + my $def_port = defined($port) && ($port eq "" || + $port == $self->default_port); + if ($uc_host || $def_port) { + $other = $other->clone if $other == $self; + $other->host(lc $host) if $uc_host; + $other->port(undef) if $def_port; + } + $other; +} + +1; diff --git a/lib/URI/_userpass.pm b/lib/URI/_userpass.pm new file mode 100644 index 0000000..6d260c0 --- /dev/null +++ b/lib/URI/_userpass.pm @@ -0,0 +1,55 @@ +package URI::_userpass; + +use strict; +use warnings; + +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub user +{ + my $self = shift; + my $info = $self->userinfo; + if (@_) { + my $new = shift; + my $pass = defined($info) ? $info : ""; + $pass =~ s/^[^:]*//; + + if (!defined($new) && !length($pass)) { + $self->userinfo(undef); + } else { + $new = "" unless defined($new); + $new =~ s/%/%25/g; + $new =~ s/:/%3A/g; + $self->userinfo("$new$pass"); + } + } + return undef unless defined $info; + $info =~ s/:.*//; + uri_unescape($info); +} + +sub password +{ + my $self = shift; + my $info = $self->userinfo; + if (@_) { + my $new = shift; + my $user = defined($info) ? $info : ""; + $user =~ s/:.*//; + + if (!defined($new) && !length($user)) { + $self->userinfo(undef); + } else { + $new = "" unless defined($new); + $new =~ s/%/%25/g; + $self->userinfo("$user:$new"); + } + } + return undef unless defined $info; + return undef unless $info =~ s/^[^:]*://; + uri_unescape($info); +} + +1; diff --git a/lib/URI/data.pm b/lib/URI/data.pm new file mode 100644 index 0000000..7848502 --- /dev/null +++ b/lib/URI/data.pm @@ -0,0 +1,142 @@ +package URI::data; # RFC 2397 + +use strict; +use warnings; + +use parent 'URI'; + +our $VERSION = '1.69'; + +use MIME::Base64 qw(encode_base64 decode_base64); +use URI::Escape qw(uri_unescape); + +sub media_type +{ + my $self = shift; + my $opaque = $self->opaque; + $opaque =~ /^([^,]*),?/ or die; + my $old = $1; + my $base64; + $base64 = $1 if $old =~ s/(;base64)$//i; + if (@_) { + my $new = shift; + $new = "" unless defined $new; + $new =~ s/%/%25/g; + $new =~ s/,/%2C/g; + $base64 = "" unless defined $base64; + $opaque =~ s/^[^,]*,?/$new$base64,/; + $self->opaque($opaque); + } + return uri_unescape($old) if $old; # media_type can't really be "0" + "text/plain;charset=US-ASCII"; # default type +} + +sub data +{ + my $self = shift; + my($enc, $data) = split(",", $self->opaque, 2); + unless (defined $data) { + $data = ""; + $enc = "" unless defined $enc; + } + my $base64 = ($enc =~ /;base64$/i); + if (@_) { + $enc =~ s/;base64$//i if $base64; + my $new = shift; + $new = "" unless defined $new; + my $uric_count = _uric_count($new); + my $urienc_len = $uric_count + (length($new) - $uric_count) * 3; + my $base64_len = int((length($new)+2) / 3) * 4; + $base64_len += 7; # because of ";base64" marker + if ($base64_len < $urienc_len || $_[0]) { + $enc .= ";base64"; + $new = encode_base64($new, ""); + } else { + $new =~ s/%/%25/g; + } + $self->opaque("$enc,$new"); + } + return unless defined wantarray; + $data = uri_unescape($data); + return $base64 ? decode_base64($data) : $data; +} + +# I could not find a better way to interpolate the tr/// chars from +# a variable. +my $ENC = $URI::uric; +$ENC =~ s/%//; + +eval <<EOT; die $@ if $@; +sub _uric_count +{ + \$_[0] =~ tr/$ENC//; +} +EOT + +1; + +__END__ + +=head1 NAME + +URI::data - URI that contains immediate data + +=head1 SYNOPSIS + + use URI; + + $u = URI->new("data:"); + $u->media_type("image/gif"); + $u->data(scalar(`cat camel.gif`)); + print "$u\n"; + open(XV, "|xv -") and print XV $u->data; + +=head1 DESCRIPTION + +The C<URI::data> class supports C<URI> objects belonging to the I<data> +URI scheme. The I<data> URI scheme is specified in RFC 2397. It +allows inclusion of small data items as "immediate" data, as if it had +been included externally. Examples: + + data:,Perl%20is%20good + + data:image/gif;base64,R0lGODdhIAAgAIAAAAAAAPj8+CwAAAAAI + AAgAAAClYyPqcu9AJyCjtIKc5w5xP14xgeO2tlY3nWcajmZZdeJcG + Kxrmimms1KMTa1Wg8UROx4MNUq1HrycMjHT9b6xKxaFLM6VRKzI+p + KS9XtXpcbdun6uWVxJXA8pNPkdkkxhxc21LZHFOgD2KMoQXa2KMWI + JtnE2KizVUkYJVZZ1nczBxXlFopZBtoJ2diXGdNUymmJdFMAADs= + + + +C<URI> objects belonging to the data scheme support the common methods +(described in L<URI>) and the following two scheme-specific methods: + +=over 4 + +=item $uri->media_type( [$new_media_type] ) + +Can be used to get or set the media type specified in the +URI. If no media type is specified, then the default +C<"text/plain;charset=US-ASCII"> is returned. + +=item $uri->data( [$new_data] ) + +Can be used to get or set the data contained in the URI. +The data is passed unescaped (in binary form). The decision about +whether to base64 encode the data in the URI is taken automatically, +based on the encoding that produces the shorter URI string. + +=back + +=head1 SEE ALSO + +L<URI> + +=head1 COPYRIGHT + +Copyright 1995-1998 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut diff --git a/lib/URI/file.pm b/lib/URI/file.pm new file mode 100644 index 0000000..d76ddf2 --- /dev/null +++ b/lib/URI/file.pm @@ -0,0 +1,327 @@ +package URI::file; + +use strict; +use warnings; + +use parent 'URI::_generic'; +our $VERSION = "4.21"; + +use URI::Escape qw(uri_unescape); + +our $DEFAULT_AUTHORITY = ""; + +# Map from $^O values to implementation classes. The Unix +# class is the default. +our %OS_CLASS = ( + os2 => "OS2", + mac => "Mac", + MacOS => "Mac", + MSWin32 => "Win32", + win32 => "Win32", + msdos => "FAT", + dos => "FAT", + qnx => "QNX", +); + +sub os_class +{ + my($OS) = shift || $^O; + + my $class = "URI::file::" . ($OS_CLASS{$OS} || "Unix"); + no strict 'refs'; + unless (%{"$class\::"}) { + eval "require $class"; + die $@ if $@; + } + $class; +} + +sub host { uri_unescape(shift->authority(@_)) } + +sub new +{ + my($class, $path, $os) = @_; + os_class($os)->new($path); +} + +sub new_abs +{ + my $class = shift; + my $file = $class->new(@_); + return $file->abs($class->cwd) unless $$file =~ /^file:/; + $file; +} + +sub cwd +{ + my $class = shift; + require Cwd; + my $cwd = Cwd::cwd(); + $cwd = VMS::Filespec::unixpath($cwd) if $^O eq 'VMS'; + $cwd = $class->new($cwd); + $cwd .= "/" unless substr($cwd, -1, 1) eq "/"; + $cwd; +} + +sub canonical { + my $self = shift; + my $other = $self->SUPER::canonical; + + my $scheme = $other->scheme; + my $auth = $other->authority; + return $other if !defined($scheme) && !defined($auth); # relative + + if (!defined($auth) || + $auth eq "" || + lc($auth) eq "localhost" || + (defined($DEFAULT_AUTHORITY) && lc($auth) eq lc($DEFAULT_AUTHORITY)) + ) + { + # avoid cloning if $auth already match + if ((defined($auth) || defined($DEFAULT_AUTHORITY)) && + (!defined($auth) || !defined($DEFAULT_AUTHORITY) || $auth ne $DEFAULT_AUTHORITY) + ) + { + $other = $other->clone if $self == $other; + $other->authority($DEFAULT_AUTHORITY); + } + } + + $other; +} + +sub file +{ + my($self, $os) = @_; + os_class($os)->file($self); +} + +sub dir +{ + my($self, $os) = @_; + os_class($os)->dir($self); +} + +1; + +__END__ + +=head1 NAME + +URI::file - URI that maps to local file names + +=head1 SYNOPSIS + + use URI::file; + + $u1 = URI->new("file:/foo/bar"); + $u2 = URI->new("foo/bar", "file"); + + $u3 = URI::file->new($path); + $u4 = URI::file->new("c:\\windows\\", "win32"); + + $u1->file; + $u1->file("mac"); + +=head1 DESCRIPTION + +The C<URI::file> class supports C<URI> objects belonging to the I<file> +URI scheme. This scheme allows us to map the conventional file names +found on various computer systems to the URI name space. An old +specification of the I<file> URI scheme is found in RFC 1738. Some +older background information is also in RFC 1630. There are no newer +specifications as far as I know. + +If you simply want to construct I<file> URI objects from URI strings, +use the normal C<URI> constructor. If you want to construct I<file> +URI objects from the actual file names used by various systems, then +use one of the following C<URI::file> constructors: + +=over 4 + +=item $u = URI::file->new( $filename, [$os] ) + +Maps a file name to the I<file:> URI name space, creates a URI object +and returns it. The $filename is interpreted as belonging to the +indicated operating system ($os), which defaults to the value of the +$^O variable. The $filename can be either absolute or relative, and +the corresponding type of URI object for $os is returned. + +=item $u = URI::file->new_abs( $filename, [$os] ) + +Same as URI::file->new, but makes sure that the URI returned +represents an absolute file name. If the $filename argument is +relative, then the name is resolved relative to the current directory, +i.e. this constructor is really the same as: + + URI::file->new($filename)->abs(URI::file->cwd); + +=item $u = URI::file->cwd + +Returns a I<file> URI that represents the current working directory. +See L<Cwd>. + +=back + +The following methods are supported for I<file> URI (in addition to +the common and generic methods described in L<URI>): + +=over 4 + +=item $u->file( [$os] ) + +Returns a file name. It maps from the URI name space +to the file name space of the indicated operating system. + +It might return C<undef> if the name can not be represented in the +indicated file system. + +=item $u->dir( [$os] ) + +Some systems use a different form for names of directories than for plain +files. Use this method if you know you want to use the name for +a directory. + +=back + +The C<URI::file> module can be used to map generic file names to names +suitable for the current system. As such, it can work as a nice +replacement for the C<File::Spec> module. For instance, the following +code translates the UNIX-style file name F<Foo/Bar.pm> to a name +suitable for the local system: + + $file = URI::file->new("Foo/Bar.pm", "unix")->file; + die "Can't map filename Foo/Bar.pm for $^O" unless defined $file; + open(FILE, $file) || die "Can't open '$file': $!"; + # do something with FILE + +=head1 MAPPING NOTES + +Most computer systems today have hierarchically organized file systems. +Mapping the names used in these systems to the generic URI syntax +allows us to work with relative file URIs that behave as they should +when resolved using the generic algorithm for URIs (specified in RFC +2396). Mapping a file name to the generic URI syntax involves mapping +the path separator character to "/" and encoding any reserved +characters that appear in the path segments of the file name. If +path segments consisting of the strings "." or ".." have a +different meaning than what is specified for generic URIs, then these +must be encoded as well. + +If the file system has device, volume or drive specifications as +the root of the name space, then it makes sense to map them to the +authority field of the generic URI syntax. This makes sure that +relative URIs can not be resolved "above" them, i.e. generally how +relative file names work in those systems. + +Another common use of the authority field is to encode the host on which +this file name is valid. The host name "localhost" is special and +generally has the same meaning as a missing or empty authority +field. This use is in conflict with using it as a device +specification, but can often be resolved for device specifications +having characters not legal in plain host names. + +File name to URI mapping in normally not one-to-one. There are +usually many URIs that map to any given file name. For instance, an +authority of "localhost" maps the same as a URI with a missing or empty +authority. + +Example 1: The Mac classic (Mac OS 9 and earlier) used ":" as path separator, +but not in the same way as a generic URI. ":foo" was a relative name. "foo:bar" +was an absolute name. Also, path segments could contain the "/" character as well +as the literal "." or "..". So the mapping looks like this: + + Mac classic URI + ---------- ------------------- + :foo:bar <==> foo/bar + : <==> ./ + ::foo:bar <==> ../foo/bar + ::: <==> ../../ + foo:bar <==> file:/foo/bar + foo:bar: <==> file:/foo/bar/ + .. <==> %2E%2E + <undef> <== / + foo/ <== file:/foo%2F + ./foo.txt <== file:/.%2Ffoo.txt + +Note that if you want a relative URL, you *must* begin the path with a :. Any +path that begins with [^:] is treated as absolute. + +Example 2: The UNIX file system is easy to map, as it uses the same path +separator as URIs, has a single root, and segments of "." and ".." +have the same meaning. URIs that have the character "\0" or "/" as +part of any path segment can not be turned into valid UNIX file names. + + UNIX URI + ---------- ------------------ + foo/bar <==> foo/bar + /foo/bar <==> file:/foo/bar + /foo/bar <== file://localhost/foo/bar + file: ==> ./file: + <undef> <== file:/fo%00/bar + / <==> file:/ + +=cut + + +RFC 1630 + + [...] + + There is clearly a danger of confusion that a link made to a local + file should be followed by someone on a different system, with + unexpected and possibly harmful results. Therefore, the convention + is that even a "file" URL is provided with a host part. This allows + a client on another system to know that it cannot access the file + system, or perhaps to use some other local mechanism to access the + file. + + The special value "localhost" is used in the host field to indicate + that the filename should really be used on whatever host one is. + This for example allows links to be made to files which are + distributed on many machines, or to "your unix local password file" + subject of course to consistency across the users of the data. + + A void host field is equivalent to "localhost". + +=head1 CONFIGURATION VARIABLES + +The following configuration variables influence how the class and its +methods behave: + +=over + +=item %URI::file::OS_CLASS + +This hash maps OS identifiers to implementation classes. You might +want to add or modify this if you want to plug in your own file +handler class. Normally the keys should match the $^O values in use. + +If there is no mapping then the "Unix" implementation is used. + +=item $URI::file::DEFAULT_AUTHORITY + +This determine what "authority" string to include in absolute file +URIs. It defaults to "". If you prefer verbose URIs you might set it +to be "localhost". + +Setting this value to C<undef> force behaviour compatible to URI v1.31 +and earlier. In this mode host names in UNC paths and drive letters +are mapped to the authority component on Windows, while we produce +authority-less URIs on Unix. + +=back + + +=head1 SEE ALSO + +L<URI>, L<File::Spec>, L<perlport> + +=head1 COPYRIGHT + +Copyright 1995-1998,2004 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut diff --git a/lib/URI/file/Base.pm b/lib/URI/file/Base.pm new file mode 100644 index 0000000..ebf24aa --- /dev/null +++ b/lib/URI/file/Base.pm @@ -0,0 +1,84 @@ +package URI::file::Base; + +use strict; +use warnings; + +use URI::Escape qw(); + +our $VERSION = "1.69"; + +sub new +{ + my $class = shift; + my $path = shift; + $path = "" unless defined $path; + + my($auth, $escaped_auth, $escaped_path); + + ($auth, $escaped_auth) = $class->_file_extract_authority($path); + ($path, $escaped_path) = $class->_file_extract_path($path); + + if (defined $auth) { + $auth =~ s,%,%25,g unless $escaped_auth; + $auth =~ s,([/?\#]), URI::Escape::escape_char($1),eg; + $auth = "//$auth"; + if (defined $path) { + $path = "/$path" unless substr($path, 0, 1) eq "/"; + } else { + $path = ""; + } + } else { + return undef unless defined $path; + $auth = ""; + } + + $path =~ s,([%;?]), URI::Escape::escape_char($1),eg unless $escaped_path; + $path =~ s/\#/%23/g; + + my $uri = $auth . $path; + $uri = "file:$uri" if substr($uri, 0, 1) eq "/"; + + URI->new($uri, "file"); +} + +sub _file_extract_authority +{ + my($class, $path) = @_; + return undef unless $class->_file_is_absolute($path); + return $URI::file::DEFAULT_AUTHORITY; +} + +sub _file_extract_path +{ + return undef; +} + +sub _file_is_absolute +{ + return 0; +} + +sub _file_is_localhost +{ + shift; # class + my $host = lc(shift); + return 1 if $host eq "localhost"; + eval { + require Net::Domain; + lc(Net::Domain::hostfqdn()) eq $host || + lc(Net::Domain::hostname()) eq $host; + }; +} + +sub file +{ + undef; +} + +sub dir +{ + my $self = shift; + $self->file(@_); +} + +1; diff --git a/lib/URI/file/FAT.pm b/lib/URI/file/FAT.pm new file mode 100644 index 0000000..2d78275 --- /dev/null +++ b/lib/URI/file/FAT.pm @@ -0,0 +1,27 @@ +package URI::file::FAT; + +use strict; +use warnings; + +use parent 'URI::file::Win32'; + +our $VERSION = "1.69"; + +sub fix_path +{ + shift; # class + for (@_) { + # turn it into 8.3 names + my @p = map uc, split(/\./, $_, -1); + return if @p > 2; # more than 1 dot is not allowed + @p = ("") unless @p; # split bug? (returns nothing when splitting "") + $_ = substr($p[0], 0, 8); + if (@p > 1) { + my $ext = substr($p[1], 0, 3); + $_ .= ".$ext" if length $ext; + } + } + 1; # ok +} + +1; diff --git a/lib/URI/file/Mac.pm b/lib/URI/file/Mac.pm new file mode 100644 index 0000000..4d8e766 --- /dev/null +++ b/lib/URI/file/Mac.pm @@ -0,0 +1,121 @@ +package URI::file::Mac; + +use strict; +use warnings; + +use parent 'URI::file::Base'; + +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub _file_extract_path +{ + my $class = shift; + my $path = shift; + + my @pre; + if ($path =~ s/^(:+)//) { + if (length($1) == 1) { + @pre = (".") unless length($path); + } else { + @pre = ("..") x (length($1) - 1); + } + } else { #absolute + $pre[0] = ""; + } + + my $isdir = ($path =~ s/:$//); + $path =~ s,([%/;]), URI::Escape::escape_char($1),eg; + + my @path = split(/:/, $path, -1); + for (@path) { + if ($_ eq "." || $_ eq "..") { + $_ = "%2E" x length($_); + } + $_ = ".." unless length($_); + } + push (@path,"") if $isdir; + (join("/", @pre, @path), 1); +} + + +sub file +{ + my $class = shift; + my $uri = shift; + my @path; + + my $auth = $uri->authority; + if (defined $auth) { + if (lc($auth) ne "localhost" && $auth ne "") { + my $u_auth = uri_unescape($auth); + if (!$class->_file_is_localhost($u_auth)) { + # some other host (use it as volume name) + @path = ("", $auth); + # XXX or just return to make it illegal; + } + } + } + my @ps = split("/", $uri->path, -1); + shift @ps if @path; + push(@path, @ps); + + my $pre = ""; + if (!@path) { + return; # empty path; XXX return ":" instead? + } elsif ($path[0] eq "") { + # absolute + shift(@path); + if (@path == 1) { + return if $path[0] eq ""; # not root directory + push(@path, ""); # volume only, effectively append ":" + } + @ps = @path; + @path = (); + my $part; + for (@ps) { #fix up "." and "..", including interior, in relatives + next if $_ eq "."; + $part = $_ eq ".." ? "" : $_; + push(@path,$part); + } + if ($ps[-1] eq "..") { #if this happens, we need another : + push(@path,""); + } + + } else { + $pre = ":"; + @ps = @path; + @path = (); + my $part; + for (@ps) { #fix up "." and "..", including interior, in relatives + next if $_ eq "."; + $part = $_ eq ".." ? "" : $_; + push(@path,$part); + } + if ($ps[-1] eq "..") { #if this happens, we need another : + push(@path,""); + } + + } + return unless $pre || @path; + for (@path) { + s/;.*//; # get rid of parameters + #return unless length; # XXX + $_ = uri_unescape($_); + return if /\0/; + return if /:/; # Should we? + } + $pre . join(":", @path); +} + +sub dir +{ + my $class = shift; + my $path = $class->file(@_); + return unless defined $path; + $path .= ":" unless $path =~ /:$/; + $path; +} + +1; diff --git a/lib/URI/file/OS2.pm b/lib/URI/file/OS2.pm new file mode 100644 index 0000000..f451f9e --- /dev/null +++ b/lib/URI/file/OS2.pm @@ -0,0 +1,32 @@ +package URI::file::OS2; + +use strict; +use warnings; + +use parent 'URI::file::Win32'; + +our $VERSION = "1.69"; + +# The Win32 version translates k:/foo to file://k:/foo (?!) +# We add an empty host + +sub _file_extract_authority +{ + my $class = shift; + return $1 if $_[0] =~ s,^\\\\([^\\]+),,; # UNC + return $1 if $_[0] =~ s,^//([^/]+),,; # UNC too? + + if ($_[0] =~ m#^[a-zA-Z]{1,2}:#) { # allow for ab: drives + return ""; + } + return; +} + +sub file { + my $p = &URI::file::Win32::file; + return unless defined $p; + $p =~ s,\\,/,g; + $p; +} + +1; diff --git a/lib/URI/file/QNX.pm b/lib/URI/file/QNX.pm new file mode 100644 index 0000000..1d725e4 --- /dev/null +++ b/lib/URI/file/QNX.pm @@ -0,0 +1,20 @@ +package URI::file::QNX; + +use strict; +use warnings; + +use parent 'URI::file::Unix'; + +our $VERSION = "1.69"; + +sub _file_extract_path +{ + my($class, $path) = @_; + # tidy path + $path =~ s,(.)//+,$1/,g; # ^// is correct + $path =~ s,(/\.)+/,/,g; + $path = "./$path" if $path =~ m,^[^:/]+:,,; # look like "scheme:" + $path; +} + +1; diff --git a/lib/URI/file/Unix.pm b/lib/URI/file/Unix.pm new file mode 100644 index 0000000..b06acc7 --- /dev/null +++ b/lib/URI/file/Unix.pm @@ -0,0 +1,58 @@ +package URI::file::Unix; + +use strict; +use warnings; + +use parent 'URI::file::Base'; + +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub _file_extract_path +{ + my($class, $path) = @_; + + # tidy path + $path =~ s,//+,/,g; + $path =~ s,(/\.)+/,/,g; + $path = "./$path" if $path =~ m,^[^:/]+:,,; # look like "scheme:" + + return $path; +} + +sub _file_is_absolute { + my($class, $path) = @_; + return $path =~ m,^/,; +} + +sub file +{ + my $class = shift; + my $uri = shift; + my @path; + + my $auth = $uri->authority; + if (defined($auth)) { + if (lc($auth) ne "localhost" && $auth ne "") { + $auth = uri_unescape($auth); + unless ($class->_file_is_localhost($auth)) { + push(@path, "", "", $auth); + } + } + } + + my @ps = $uri->path_segments; + shift @ps if @path; + push(@path, @ps); + + for (@path) { + # Unix file/directory names are not allowed to contain '\0' or '/' + return undef if /\0/; + return undef if /\//; # should we really? + } + + return join("/", @path); +} + +1; diff --git a/lib/URI/file/Win32.pm b/lib/URI/file/Win32.pm new file mode 100644 index 0000000..f00b9c9 --- /dev/null +++ b/lib/URI/file/Win32.pm @@ -0,0 +1,87 @@ +package URI::file::Win32; + +use strict; +use warnings; + +use parent 'URI::file::Base'; + +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub _file_extract_authority +{ + my $class = shift; + + return $class->SUPER::_file_extract_authority($_[0]) + if defined $URI::file::DEFAULT_AUTHORITY; + + return $1 if $_[0] =~ s,^\\\\([^\\]+),,; # UNC + return $1 if $_[0] =~ s,^//([^/]+),,; # UNC too? + + if ($_[0] =~ s,^([a-zA-Z]:),,) { + my $auth = $1; + $auth .= "relative" if $_[0] !~ m,^[\\/],; + return $auth; + } + return undef; +} + +sub _file_extract_path +{ + my($class, $path) = @_; + $path =~ s,\\,/,g; + #$path =~ s,//+,/,g; + $path =~ s,(/\.)+/,/,g; + + if (defined $URI::file::DEFAULT_AUTHORITY) { + $path =~ s,^([a-zA-Z]:),/$1,; + } + + return $path; +} + +sub _file_is_absolute { + my($class, $path) = @_; + return $path =~ m,^[a-zA-Z]:, || $path =~ m,^[/\\],; +} + +sub file +{ + my $class = shift; + my $uri = shift; + my $auth = $uri->authority; + my $rel; # is filename relative to drive specified in authority + if (defined $auth) { + $auth = uri_unescape($auth); + if ($auth =~ /^([a-zA-Z])[:|](relative)?/) { + $auth = uc($1) . ":"; + $rel++ if $2; + } elsif (lc($auth) eq "localhost") { + $auth = ""; + } elsif (length $auth) { + $auth = "\\\\" . $auth; # UNC + } + } else { + $auth = ""; + } + + my @path = $uri->path_segments; + for (@path) { + return undef if /\0/; + return undef if /\//; + #return undef if /\\/; # URLs with "\" is not uncommon + } + return undef unless $class->fix_path(@path); + + my $path = join("\\", @path); + $path =~ s/^\\// if $rel; + $path = $auth . $path; + $path =~ s,^\\([a-zA-Z])[:|],\u$1:,; + + return $path; +} + +sub fix_path { 1; } + +1; diff --git a/lib/URI/ftp.pm b/lib/URI/ftp.pm new file mode 100644 index 0000000..7d6848f --- /dev/null +++ b/lib/URI/ftp.pm @@ -0,0 +1,46 @@ +package URI::ftp; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent qw(URI::_server URI::_userpass); + +sub default_port { 21 } + +sub path { shift->path_query(@_) } # XXX + +sub _user { shift->SUPER::user(@_); } +sub _password { shift->SUPER::password(@_); } + +sub user +{ + my $self = shift; + my $user = $self->_user(@_); + $user = "anonymous" unless defined $user; + $user; +} + +sub password +{ + my $self = shift; + my $pass = $self->_password(@_); + unless (defined $pass) { + my $user = $self->user; + if ($user eq 'anonymous' || $user eq 'ftp') { + # anonymous ftp login password + # If there is no ftp anonymous password specified + # then we'll just use 'anonymous@' + # We don't try to send the read e-mail address because: + # - We want to remain anonymous + # - We want to stop SPAM + # - We don't want to let ftp sites to discriminate by the user, + # host, country or ftp client being used. + $pass = 'anonymous@'; + } + } + $pass; +} + +1; diff --git a/lib/URI/gopher.pm b/lib/URI/gopher.pm new file mode 100644 index 0000000..d9f7eb5 --- /dev/null +++ b/lib/URI/gopher.pm @@ -0,0 +1,97 @@ +package URI::gopher; # <draft-murali-url-gopher>, Dec 4, 1996 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_server'; + +use URI::Escape qw(uri_unescape); + +# A Gopher URL follows the common internet scheme syntax as defined in +# section 4.3 of [RFC-URL-SYNTAX]: +# +# gopher://<host>[:<port>]/<gopher-path> +# +# where +# +# <gopher-path> := <gopher-type><selector> | +# <gopher-type><selector>%09<search> | +# <gopher-type><selector>%09<search>%09<gopher+_string> +# +# <gopher-type> := '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' +# '8' | '9' | '+' | 'I' | 'g' | 'T' +# +# <selector> := *pchar Refer to RFC 1808 [4] +# <search> := *pchar +# <gopher+_string> := *uchar Refer to RFC 1738 [3] +# +# If the optional port is omitted, the port defaults to 70. + +sub default_port { 70 } + +sub _gopher_type +{ + my $self = shift; + my $path = $self->path_query; + $path =~ s,^/,,; + my $gtype = $1 if $path =~ s/^(.)//s; + if (@_) { + my $new_type = shift; + if (defined($new_type)) { + Carp::croak("Bad gopher type '$new_type'") + unless length($new_type) == 1; + substr($path, 0, 0) = $new_type; + $self->path_query($path); + } else { + Carp::croak("Can't delete gopher type when selector is present") + if length($path); + $self->path_query(undef); + } + } + return $gtype; +} + +sub gopher_type +{ + my $self = shift; + my $gtype = $self->_gopher_type(@_); + $gtype = "1" unless defined $gtype; + $gtype; +} + +sub gtype { goto &gopher_type } # URI::URL compatibility + +sub selector { shift->_gfield(0, @_) } +sub search { shift->_gfield(1, @_) } +sub string { shift->_gfield(2, @_) } + +sub _gfield +{ + my $self = shift; + my $fno = shift; + my $path = $self->path_query; + + # not according to spec., but many popular browsers accept + # gopher URLs with a '?' before the search string. + $path =~ s/\?/\t/; + $path = uri_unescape($path); + $path =~ s,^/,,; + my $gtype = $1 if $path =~ s,^(.),,s; + my @path = split(/\t/, $path, 3); + if (@_) { + # modify + my $new = shift; + $path[$fno] = $new; + pop(@path) while @path && !defined($path[-1]); + for (@path) { $_="" unless defined } + $path = $gtype; + $path = "1" unless defined $path; + $path .= join("\t", @path); + $self->path_query($path); + } + $path[$fno]; +} + +1; diff --git a/lib/URI/http.pm b/lib/URI/http.pm new file mode 100644 index 0000000..a7e921a --- /dev/null +++ b/lib/URI/http.pm @@ -0,0 +1,27 @@ +package URI::http; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_server'; + +sub default_port { 80 } + +sub canonical +{ + my $self = shift; + my $other = $self->SUPER::canonical; + + my $slash_path = defined($other->authority) && + !length($other->path) && !defined($other->query); + + if ($slash_path) { + $other = $other->clone if $other == $self; + $other->path("/"); + } + $other; +} + +1; diff --git a/lib/URI/https.pm b/lib/URI/https.pm new file mode 100644 index 0000000..e346b30 --- /dev/null +++ b/lib/URI/https.pm @@ -0,0 +1,14 @@ +package URI::https; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::http'; + +sub default_port { 443 } + +sub secure { 1 } + +1; diff --git a/lib/URI/ldap.pm b/lib/URI/ldap.pm new file mode 100644 index 0000000..60e7f2b --- /dev/null +++ b/lib/URI/ldap.pm @@ -0,0 +1,120 @@ +# Copyright (c) 1998 Graham Barr <gbarr@pobox.com>. All rights reserved. +# This program is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. + +package URI::ldap; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent qw(URI::_ldap URI::_server); + +sub default_port { 389 } + +sub _nonldap_canonical { + my $self = shift; + $self->URI::_server::canonical(@_); +} + +1; + +__END__ + +=head1 NAME + +URI::ldap - LDAP Uniform Resource Locators + +=head1 SYNOPSIS + + use URI; + + $uri = URI->new("ldap:$uri_string"); + $dn = $uri->dn; + $filter = $uri->filter; + @attr = $uri->attributes; + $scope = $uri->scope; + %extn = $uri->extensions; + + $uri = URI->new("ldap:"); # start empty + $uri->host("ldap.itd.umich.edu"); + $uri->dn("o=University of Michigan,c=US"); + $uri->attributes(qw(postalAddress)); + $uri->scope('sub'); + $uri->filter('(cn=Babs Jensen)'); + print $uri->as_string,"\n"; + +=head1 DESCRIPTION + +C<URI::ldap> provides an interface to parse an LDAP URI into its +constituent parts and also to build a URI as described in +RFC 2255. + +=head1 METHODS + +C<URI::ldap> supports all the generic and server methods defined by +L<URI>, plus the following. + +Each of the following methods can be used to set or get the value in +the URI. The values are passed in unescaped form. None of these +return undefined values, but elements without a default can be empty. +If arguments are given, then a new value is set for the given part +of the URI. + +=over 4 + +=item $uri->dn( [$new_dn] ) + +Sets or gets the I<Distinguished Name> part of the URI. The DN +identifies the base object of the LDAP search. + +=item $uri->attributes( [@new_attrs] ) + +Sets or gets the list of attribute names which are +returned by the search. + +=item $uri->scope( [$new_scope] ) + +Sets or gets the scope to be used by the search. The value can be one of +C<"base">, C<"one"> or C<"sub">. If none is given in the URI then the +return value defaults to C<"base">. + +=item $uri->_scope( [$new_scope] ) + +Same as scope(), but does not default to anything. + +=item $uri->filter( [$new_filter] ) + +Sets or gets the filter to be used by the search. If none is given in +the URI then the return value defaults to C<"(objectClass=*)">. + +=item $uri->_filter( [$new_filter] ) + +Same as filter(), but does not default to anything. + +=item $uri->extensions( [$etype => $evalue,...] ) + +Sets or gets the extensions used for the search. The list passed should +be in the form etype1 => evalue1, etype2 => evalue2,... This is also +the form of list that is returned. + +=back + +=head1 SEE ALSO + +L<http://tools.ietf.org/html/rfc2255> + +=head1 AUTHOR + +Graham Barr E<lt>F<gbarr@pobox.com>E<gt> + +Slightly modified by Gisle Aas to fit into the URI distribution. + +=head1 COPYRIGHT + +Copyright (c) 1998 Graham Barr. All rights reserved. This program is +free software; you can redistribute it and/or modify it under the same +terms as Perl itself. + +=cut diff --git a/lib/URI/ldapi.pm b/lib/URI/ldapi.pm new file mode 100644 index 0000000..12a0b08 --- /dev/null +++ b/lib/URI/ldapi.pm @@ -0,0 +1,29 @@ +package URI::ldapi; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent qw(URI::_ldap URI::_generic); + +require URI::Escape; + +sub un_path { + my $self = shift; + my $old = URI::Escape::uri_unescape($self->authority); + if (@_) { + my $p = shift; + $p =~ s/:/%3A/g; + $p =~ s/\@/%40/g; + $self->authority($p); + } + return $old; +} + +sub _nonldap_canonical { + my $self = shift; + $self->URI::_generic::canonical(@_); +} + +1; diff --git a/lib/URI/ldaps.pm b/lib/URI/ldaps.pm new file mode 100644 index 0000000..80a96d3 --- /dev/null +++ b/lib/URI/ldaps.pm @@ -0,0 +1,14 @@ +package URI::ldaps; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::ldap'; + +sub default_port { 636 } + +sub secure { 1 } + +1; diff --git a/lib/URI/mailto.pm b/lib/URI/mailto.pm new file mode 100644 index 0000000..0e94463 --- /dev/null +++ b/lib/URI/mailto.pm @@ -0,0 +1,73 @@ +package URI::mailto; # RFC 2368 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent qw(URI URI::_query); + +sub to +{ + my $self = shift; + my @old = $self->headers; + if (@_) { + my @new = @old; + # get rid of any other to: fields + for (my $i = 0; $i < @new; $i += 2) { + if (lc($new[$i] || '') eq "to") { + splice(@new, $i, 2); + redo; + } + } + + my $to = shift; + $to = "" unless defined $to; + unshift(@new, "to" => $to); + $self->headers(@new); + } + return unless defined wantarray; + + my @to; + while (@old) { + my $h = shift @old; + my $v = shift @old; + push(@to, $v) if lc($h) eq "to"; + } + join(",", @to); +} + + +sub headers +{ + my $self = shift; + + # The trick is to just treat everything as the query string... + my $opaque = "to=" . $self->opaque; + $opaque =~ s/\?/&/; + + if (@_) { + my @new = @_; + + # strip out any "to" fields + my @to; + for (my $i=0; $i < @new; $i += 2) { + if (lc($new[$i] || '') eq "to") { + push(@to, (splice(@new, $i, 2))[1]); # remove header + redo; + } + } + + my $new = join(",",@to); + $new =~ s/%/%25/g; + $new =~ s/\?/%3F/g; + $self->opaque($new); + $self->query_form(@new) if @new; + } + return unless defined wantarray; + + # I am lazy today... + URI->new("mailto:?$opaque")->query_form; +} + +1; diff --git a/lib/URI/mms.pm b/lib/URI/mms.pm new file mode 100644 index 0000000..c9af387 --- /dev/null +++ b/lib/URI/mms.pm @@ -0,0 +1,12 @@ +package URI::mms; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::http'; + +sub default_port { 1755 } + +1; diff --git a/lib/URI/news.pm b/lib/URI/news.pm new file mode 100644 index 0000000..77e2c18 --- /dev/null +++ b/lib/URI/news.pm @@ -0,0 +1,71 @@ +package URI::news; # draft-gilman-news-url-01 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_server'; + +use URI::Escape qw(uri_unescape); +use Carp (); + +sub default_port { 119 } + +# newsURL = scheme ":" [ news-server ] [ refbygroup | message ] +# scheme = "news" | "snews" | "nntp" +# news-server = "//" server "/" +# refbygroup = group [ "/" messageno [ "-" messageno ] ] +# message = local-part "@" domain + +sub _group +{ + my $self = shift; + my $old = $self->path; + if (@_) { + my($group,$from,$to) = @_; + if ($group =~ /\@/) { + $group =~ s/^<(.*)>$/$1/; # "<" and ">" should not be part of it + } + $group =~ s,%,%25,g; + $group =~ s,/,%2F,g; + my $path = $group; + if (defined $from) { + $path .= "/$from"; + $path .= "-$to" if defined $to; + } + $self->path($path); + } + + $old =~ s,^/,,; + if ($old !~ /\@/ && $old =~ s,/(.*),, && wantarray) { + my $extra = $1; + return (uri_unescape($old), split(/-/, $extra)); + } + uri_unescape($old); +} + + +sub group +{ + my $self = shift; + if (@_) { + Carp::croak("Group name can't contain '\@'") if $_[0] =~ /\@/; + } + my @old = $self->_group(@_); + return if $old[0] =~ /\@/; + wantarray ? @old : $old[0]; +} + +sub message +{ + my $self = shift; + if (@_) { + Carp::croak("Message must contain '\@'") unless $_[0] =~ /\@/; + } + my $old = $self->_group(@_); + return undef unless $old =~ /\@/; + return $old; +} + +1; diff --git a/lib/URI/nntp.pm b/lib/URI/nntp.pm new file mode 100644 index 0000000..73dc629 --- /dev/null +++ b/lib/URI/nntp.pm @@ -0,0 +1,10 @@ +package URI::nntp; # draft-gilman-news-url-01 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::news'; + +1; diff --git a/lib/URI/pop.pm b/lib/URI/pop.pm new file mode 100644 index 0000000..cfc5e11 --- /dev/null +++ b/lib/URI/pop.pm @@ -0,0 +1,71 @@ +package URI::pop; # RFC 2384 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_server'; + +use URI::Escape qw(uri_unescape); + +sub default_port { 110 } + +#pop://<user>;auth=<auth>@<host>:<port> + +sub user +{ + my $self = shift; + my $old = $self->userinfo; + + if (@_) { + my $new_info = $old; + $new_info = "" unless defined $new_info; + $new_info =~ s/^[^;]*//; + + my $new = shift; + if (!defined($new) && !length($new_info)) { + $self->userinfo(undef); + } else { + $new = "" unless defined $new; + $new =~ s/%/%25/g; + $new =~ s/;/%3B/g; + $self->userinfo("$new$new_info"); + } + } + + return undef unless defined $old; + $old =~ s/;.*//; + return uri_unescape($old); +} + +sub auth +{ + my $self = shift; + my $old = $self->userinfo; + + if (@_) { + my $new = $old; + $new = "" unless defined $new; + $new =~ s/(^[^;]*)//; + my $user = $1; + $new =~ s/;auth=[^;]*//i; + + + my $auth = shift; + if (defined $auth) { + $auth =~ s/%/%25/g; + $auth =~ s/;/%3B/g; + $new = ";AUTH=$auth$new"; + } + $self->userinfo("$user$new"); + + } + + return undef unless defined $old; + $old =~ s/^[^;]*//; + return uri_unescape($1) if $old =~ /;auth=(.*)/i; + return; +} + +1; diff --git a/lib/URI/rlogin.pm b/lib/URI/rlogin.pm new file mode 100644 index 0000000..5ed141b --- /dev/null +++ b/lib/URI/rlogin.pm @@ -0,0 +1,12 @@ +package URI::rlogin; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_login'; + +sub default_port { 513 } + +1; diff --git a/lib/URI/rsync.pm b/lib/URI/rsync.pm new file mode 100644 index 0000000..9cb649a --- /dev/null +++ b/lib/URI/rsync.pm @@ -0,0 +1,14 @@ +package URI::rsync; # http://rsync.samba.org/ + +# rsync://[USER@]HOST[:PORT]/SRC + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent qw(URI::_server URI::_userpass); + +sub default_port { 873 } + +1; diff --git a/lib/URI/rtsp.pm b/lib/URI/rtsp.pm new file mode 100644 index 0000000..fa82efc --- /dev/null +++ b/lib/URI/rtsp.pm @@ -0,0 +1,12 @@ +package URI::rtsp; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::http'; + +sub default_port { 554 } + +1; diff --git a/lib/URI/rtspu.pm b/lib/URI/rtspu.pm new file mode 100644 index 0000000..b91fb80 --- /dev/null +++ b/lib/URI/rtspu.pm @@ -0,0 +1,12 @@ +package URI::rtspu; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::rtsp'; + +sub default_port { 554 } + +1; diff --git a/lib/URI/sftp.pm b/lib/URI/sftp.pm new file mode 100644 index 0000000..03bf9b3 --- /dev/null +++ b/lib/URI/sftp.pm @@ -0,0 +1,10 @@ +package URI::sftp; + +use strict; +use warnings; + +use parent 'URI::ssh'; + +our $VERSION = "1.69"; + +1; diff --git a/lib/URI/sip.pm b/lib/URI/sip.pm new file mode 100644 index 0000000..7b27a9f --- /dev/null +++ b/lib/URI/sip.pm @@ -0,0 +1,85 @@ +# +# Written by Ryan Kereliuk <ryker@ryker.org>. This file may be +# distributed under the same terms as Perl itself. +# +# The RFC 3261 sip URI is <scheme>:<authority>;<params>?<query>. +# + +package URI::sip; + +use strict; +use warnings; + +use parent qw(URI::_server URI::_userpass); + +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub default_port { 5060 } + +sub authority +{ + my $self = shift; + $$self =~ m,^($URI::scheme_re:)?([^;?]*)(.*)$,os or die; + my $old = $2; + + if (@_) { + my $auth = shift; + $$self = defined($1) ? $1 : ""; + my $rest = $3; + if (defined $auth) { + $auth =~ s/([^$URI::uric])/ URI::Escape::escape_char($1)/ego; + $$self .= "$auth"; + } + $$self .= $rest; + } + $old; +} + +sub params_form +{ + my $self = shift; + $$self =~ m,^((?:$URI::scheme_re:)?)(?:([^;?]*))?(;[^?]*)?(.*)$,os or die; + my $paramstr = $3; + + if (@_) { + my @args = @_; + $$self = $1 . $2; + my $rest = $4; + my @new; + for (my $i=0; $i < @args; $i += 2) { + push(@new, "$args[$i]=$args[$i+1]"); + } + $paramstr = join(";", @new); + $$self .= ";" . $paramstr . $rest; + } + $paramstr =~ s/^;//o; + return split(/[;=]/, $paramstr); +} + +sub params +{ + my $self = shift; + $$self =~ m,^((?:$URI::scheme_re:)?)(?:([^;?]*))?(;[^?]*)?(.*)$,os or die; + my $paramstr = $3; + + if (@_) { + my $new = shift; + $$self = $1 . $2; + my $rest = $4; + $$self .= $paramstr . $rest; + } + $paramstr =~ s/^;//o; + return $paramstr; +} + +# Inherited methods that make no sense for a SIP URI. +sub path {} +sub path_query {} +sub path_segments {} +sub abs { shift } +sub rel { shift } +sub query_keywords {} + +1; diff --git a/lib/URI/sips.pm b/lib/URI/sips.pm new file mode 100644 index 0000000..767067f --- /dev/null +++ b/lib/URI/sips.pm @@ -0,0 +1,14 @@ +package URI::sips; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::sip'; + +sub default_port { 5061 } + +sub secure { 1 } + +1; diff --git a/lib/URI/snews.pm b/lib/URI/snews.pm new file mode 100644 index 0000000..de1d515 --- /dev/null +++ b/lib/URI/snews.pm @@ -0,0 +1,14 @@ +package URI::snews; # draft-gilman-news-url-01 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::news'; + +sub default_port { 563 } + +sub secure { 1 } + +1; diff --git a/lib/URI/ssh.pm b/lib/URI/ssh.pm new file mode 100644 index 0000000..396c7a7 --- /dev/null +++ b/lib/URI/ssh.pm @@ -0,0 +1,16 @@ +package URI::ssh; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_login'; + +# ssh://[USER@]HOST[:PORT]/SRC + +sub default_port { 22 } + +sub secure { 1 } + +1; diff --git a/lib/URI/telnet.pm b/lib/URI/telnet.pm new file mode 100644 index 0000000..b86d9aa --- /dev/null +++ b/lib/URI/telnet.pm @@ -0,0 +1,12 @@ +package URI::telnet; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_login'; + +sub default_port { 23 } + +1; diff --git a/lib/URI/tn3270.pm b/lib/URI/tn3270.pm new file mode 100644 index 0000000..fb60acc --- /dev/null +++ b/lib/URI/tn3270.pm @@ -0,0 +1,12 @@ +package URI::tn3270; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_login'; + +sub default_port { 23 } + +1; diff --git a/lib/URI/urn.pm b/lib/URI/urn.pm new file mode 100644 index 0000000..fe744f8 --- /dev/null +++ b/lib/URI/urn.pm @@ -0,0 +1,100 @@ +package URI::urn; # RFC 2141 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI'; + +use Carp qw(carp); + +my %implementor; + +sub _init { + my $class = shift; + my $self = $class->SUPER::_init(@_); + my $nid = $self->nid; + + my $impclass = $implementor{$nid}; + return $impclass->_urn_init($self, $nid) if $impclass; + + $impclass = "URI::urn"; + if ($nid =~ /^[A-Za-z\d][A-Za-z\d\-]*\z/) { + my $id = $nid; + # make it a legal perl identifier + $id =~ s/-/_/g; + $id = "_$id" if $id =~ /^\d/; + + $impclass = "URI::urn::$id"; + no strict 'refs'; + unless (@{"${impclass}::ISA"}) { + # Try to load it + eval "require $impclass"; + die $@ if $@ && $@ !~ /Can\'t locate.*in \@INC/; + $impclass = "URI::urn" unless @{"${impclass}::ISA"}; + } + } + else { + carp("Illegal namespace identifier '$nid' for URN '$self'") if $^W; + } + $implementor{$nid} = $impclass; + + return $impclass->_urn_init($self, $nid); +} + +sub _urn_init { + my($class, $self, $nid) = @_; + bless $self, $class; +} + +sub _nid { + my $self = shift; + my $opaque = $self->opaque; + if (@_) { + my $v = $opaque; + my $new = shift; + $v =~ s/[^:]*/$new/; + $self->opaque($v); + # XXX possible rebless + } + $opaque =~ s/:.*//s; + return $opaque; +} + +sub nid { # namespace identifier + my $self = shift; + my $nid = $self->_nid(@_); + $nid = lc($nid) if defined($nid); + return $nid; +} + +sub nss { # namespace specific string + my $self = shift; + my $opaque = $self->opaque; + if (@_) { + my $v = $opaque; + my $new = shift; + if (defined $new) { + $v =~ s/(:|\z).*/:$new/; + } + else { + $v =~ s/:.*//s; + } + $self->opaque($v); + } + return undef unless $opaque =~ s/^[^:]*://; + return $opaque; +} + +sub canonical { + my $self = shift; + my $nid = $self->_nid; + my $new = $self->SUPER::canonical; + return $new if $nid !~ /[A-Z]/ || $nid =~ /%/; + $new = $new->clone if $new == $self; + $new->nid(lc($nid)); + return $new; +} + +1; diff --git a/lib/URI/urn/isbn.pm b/lib/URI/urn/isbn.pm new file mode 100644 index 0000000..b335044 --- /dev/null +++ b/lib/URI/urn/isbn.pm @@ -0,0 +1,103 @@ +package URI::urn::isbn; # RFC 3187 + +use strict; +use warnings; + +use parent 'URI::urn'; + +use Carp qw(carp); + +BEGIN { + require Business::ISBN; + + local $^W = 0; # don't warn about dev versions, perl5.004 style + warn "Using Business::ISBN version " . Business::ISBN->VERSION . + " which is deprecated.\nUpgrade to Business::ISBN version 2\n" + if Business::ISBN->VERSION < 2; + } + +sub _isbn { + my $nss = shift; + $nss = $nss->nss if ref($nss); + my $isbn = Business::ISBN->new($nss); + $isbn = undef if $isbn && !$isbn->is_valid; + return $isbn; +} + +sub _nss_isbn { + my $self = shift; + my $nss = $self->nss(@_); + my $isbn = _isbn($nss); + $isbn = $isbn->as_string if $isbn; + return($nss, $isbn); +} + +sub isbn { + my $self = shift; + my $isbn; + (undef, $isbn) = $self->_nss_isbn(@_); + return $isbn; +} + +sub isbn_publisher_code { + my $isbn = shift->_isbn || return undef; + return $isbn->publisher_code; +} + +BEGIN { +my $group_method = do { + local $^W = 0; # don't warn about dev versions, perl5.004 style + Business::ISBN->VERSION >= 2 ? 'group_code' : 'country_code'; + }; + +sub isbn_group_code { + my $isbn = shift->_isbn || return undef; + return $isbn->$group_method; +} +} + +sub isbn_country_code { + my $name = (caller(0))[3]; $name =~ s/.*:://; + carp "$name is DEPRECATED. Use isbn_group_code instead"; + + no strict 'refs'; + &isbn_group_code; +} + +BEGIN { +my $isbn13_method = do { + local $^W = 0; # don't warn about dev versions, perl5.004 style + Business::ISBN->VERSION >= 2 ? 'as_isbn13' : 'as_ean'; + }; + +sub isbn13 { + my $isbn = shift->_isbn || return undef; + + # Business::ISBN 1.x didn't put hyphens in the EAN, and it was just a string + # Business::ISBN 2.0 doesn't do EAN, but it does ISBN-13 objects + # and it uses the hyphens, so call as_string with an empty anon array + # or, adjust the test and features to say that it comes out with hyphens. + my $thingy = $isbn->$isbn13_method; + return eval { $thingy->can( 'as_string' ) } ? $thingy->as_string([]) : $thingy; +} +} + +sub isbn_as_ean { + my $name = (caller(0))[3]; $name =~ s/.*:://; + carp "$name is DEPRECATED. Use isbn13 instead"; + + no strict 'refs'; + &isbn13; +} + +sub canonical { + my $self = shift; + my($nss, $isbn) = $self->_nss_isbn; + my $new = $self->SUPER::canonical; + return $new unless $nss && $isbn && $nss ne $isbn; + $new = $new->clone if $new == $self; + $new->nss($isbn); + return $new; +} + +1; diff --git a/lib/URI/urn/oid.pm b/lib/URI/urn/oid.pm new file mode 100644 index 0000000..ceb8322 --- /dev/null +++ b/lib/URI/urn/oid.pm @@ -0,0 +1,20 @@ +package URI::urn::oid; # RFC 2061 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::urn'; + +sub oid { + my $self = shift; + my $old = $self->nss; + if (@_) { + $self->nss(join(".", @_)); + } + return split(/\./, $old) if wantarray; + return $old; +} + +1; @@ -0,0 +1,173 @@ +use strict; +use warnings; + +print "1..45\n"; + +# This test the resolution of abs path for all examples given +# in the "Uniform Resource Identifiers (URI): Generic Syntax" document. + +use URI; +my $base = "http://a/b/c/d;p?q"; +my $testno = 1; +my @rel_fail; + +while (<DATA>) { + #next if 1 .. /^C\.\s+/; + #last if /^D\.\s+/; + next unless /\s+(\S+)\s*=\s*(.*)/; + my $uref = $1; + my $expect = $2; + $expect =~ s/\(current document\)/$base/; + #print "$uref => $expect\n"; + + my $bad; + my $u = URI->new($uref, $base); + if ($u->abs($base)->as_string ne $expect) { + $bad++; + my $abs = $u->abs($base)->as_string; + print qq(URI->new("$uref")->abs("$base") ==> "$abs"\n); + } + + # Let's test another version of the same thing + $u = URI->new($uref); + my $b = URI->new($base); + if ($u->abs($b,1) ne $expect && $uref !~ /^http:/) { + $bad++; + print qq(URI->new("$uref")->abs(URI->new("$base"), 1)\n); + } + + # Let's try the other way + $u = URI->new($expect)->rel($base)->as_string; + if ($u ne $uref) { + push(@rel_fail, qq($testno: URI->new("$expect", "$base")->rel ==> "$u" (not "$uref")\n)); + } + + print "not " if $bad; + print "ok ", $testno++, "\n"; +} + +if (@rel_fail) { + print "\n\nIn the following cases we did not get back to where we started with rel()\n"; + print @rel_fail; +} + + + +__END__ + +Network Working Group T. Berners-Lee, MIT/LCS +INTERNET-DRAFT R. Fielding, U.C. Irvine +draft-fielding-uri-syntax-02 L. Masinter, Xerox Corporation +Expires six months after publication date March 4, 1998 + + + Uniform Resource Identifiers (URI): Generic Syntax + +[...] + +C. Examples of Resolving Relative URI References + + Within an object with a well-defined base URI of + + http://a/b/c/d;p?q + + the relative URIs would be resolved as follows: + +C.1. Normal Examples + + g:h = g:h + g = http://a/b/c/g + ./g = http://a/b/c/g + g/ = http://a/b/c/g/ + /g = http://a/g + //g = http://g + ?y = http://a/b/c/d;p?y + g?y = http://a/b/c/g?y + #s = (current document)#s + g#s = http://a/b/c/g#s + g?y#s = http://a/b/c/g?y#s + ;x = http://a/b/c/;x + g;x = http://a/b/c/g;x + g;x?y#s = http://a/b/c/g;x?y#s + . = http://a/b/c/ + ./ = http://a/b/c/ + .. = http://a/b/ + ../ = http://a/b/ + ../g = http://a/b/g + ../.. = http://a/ + ../../ = http://a/ + ../../g = http://a/g + +C.2. Abnormal Examples + + Although the following abnormal examples are unlikely to occur in + normal practice, all URI parsers should be capable of resolving them + consistently. Each example uses the same base as above. + + An empty reference refers to the start of the current document. + + <> = (current document) + + Parsers must be careful in handling the case where there are more + relative path ".." segments than there are hierarchical levels in + the base URI's path. Note that the ".." syntax cannot be used to + change the authority component of a URI. + + ../../../g = http://a/../g + ../../../../g = http://a/../../g + + In practice, some implementations strip leading relative symbolic + elements (".", "..") after applying a relative URI calculation, based + on the theory that compensating for obvious author errors is better + than allowing the request to fail. Thus, the above two references + will be interpreted as "http://a/g" by some implementations. + + Similarly, parsers must avoid treating "." and ".." as special when + they are not complete components of a relative path. + + /./g = http://a/./g + /../g = http://a/../g + g. = http://a/b/c/g. + .g = http://a/b/c/.g + g.. = http://a/b/c/g.. + ..g = http://a/b/c/..g + + Less likely are cases where the relative URI uses unnecessary or + nonsensical forms of the "." and ".." complete path segments. + + ./../g = http://a/b/g + ./g/. = http://a/b/c/g/ + g/./h = http://a/b/c/g/h + g/../h = http://a/b/c/h + g;x=1/./y = http://a/b/c/g;x=1/y + g;x=1/../y = http://a/b/c/y + + All client applications remove the query component from the base URI + before resolving relative URIs. However, some applications fail to + separate the reference's query and/or fragment components from a + relative path before merging it with the base path. This error is + rarely noticed, since typical usage of a fragment never includes the + hierarchy ("/") character, and the query component is not normally + used within relative references. + + g?y/./x = http://a/b/c/g?y/./x + g?y/../x = http://a/b/c/g?y/../x + g#s/./x = http://a/b/c/g#s/./x + g#s/../x = http://a/b/c/g#s/../x + + Some parsers allow the scheme name to be present in a relative URI + if it is the same as the base URI scheme. This is considered to be + a loophole in prior specifications of partial URIs [RFC1630]. Its + use should be avoided. + + http:g = http:g + http: = http: + + +========================================================================== + +Some extra tests for good measure... + + #foo? = (current document)#foo? + ?#foo = http://a/b/c/d;p?#foo + diff --git a/t/clone.t b/t/clone.t new file mode 100644 index 0000000..57201f5 --- /dev/null +++ b/t/clone.t @@ -0,0 +1,21 @@ +use strict; +use warnings; + +print "1..2\n"; + +use URI::URL; + +my $b = URI::URL->new("http://www/"); + +my $u1 = URI::URL->new("foo", $b); +my $u2 = $u1->clone; + +$u1->base("http://yyy/"); + +#use Data::Dump; Data::Dump::dump($b, $u1, $u2); + +print "not " unless $u1->abs->as_string eq "http://yyy/foo"; +print "ok 1\n"; + +print "not " unless $u2->abs->as_string eq "http://www/foo"; +print "ok 2\n"; @@ -0,0 +1,15 @@ +#!perl -T + +use strict; +use warnings; + +use Test::More; + +plan tests => 1; + +use URI::file; +$ENV{PATH} = "/bin:/usr/bin"; + +my $cwd = eval { URI::file->cwd }; +is($@, '', 'no exceptions'); + diff --git a/t/data.t b/t/data.t new file mode 100644 index 0000000..64920d9 --- /dev/null +++ b/t/data.t @@ -0,0 +1,111 @@ +use strict; +use warnings; + +eval { + require MIME::Base64; +}; +if ($@) { + print "1..0\n"; + print $@; + exit; +} + +print "1..22\n"; + +use URI; + +my $u = URI->new("data:,A%20brief%20note"); +print "not " unless $u->scheme eq "data" && $u->opaque eq ",A%20brief%20note"; +print "ok 1\n"; + +print "not " unless $u->media_type eq "text/plain;charset=US-ASCII" && + $u->data eq "A brief note"; +print "ok 2\n"; + +my $old = $u->data("Får-i-kål er tingen!"); +print "not " unless $old eq "A brief note" && $u eq "data:,F%E5r-i-k%E5l%20er%20tingen!"; +print "ok 3\n"; + +$old = $u->media_type("text/plain;charset=iso-8859-1"); +print "not " unless $old eq "text/plain;charset=US-ASCII" && + $u eq "data:text/plain;charset=iso-8859-1,F%E5r-i-k%E5l%20er%20tingen!"; +print "ok 4\n"; + + +$u = URI->new("data:image/gif;base64,R0lGODdhMAAwAPAAAAAAAP///ywAAAAAMAAwAAAC8IyPqcvt3wCcDkiLc7C0qwyGHhSWpjQu5yqmCYsapyuvUUlvONmOZtfzgFzByTB10QgxOR0TqBQejhRNzOfkVJ+5YiUqrXF5Y5lKh/DeuNcP5yLWGsEbtLiOSpa/TPg7JpJHxyendzWTBfX0cxOnKPjgBzi4diinWGdkF8kjdfnycQZXZeYGejmJlZeGl9i2icVqaNVailT6F5iJ90m6mvuTS4OK05M0vDk0Q4XUtwvKOzrcd3iq9uisF81M1OIcR7lEewwcLp7tuNNkM3uNna3F2JQFo97Vriy/Xl4/f1cf5VWzXyym7PHhhx4dbgYKAAA7"); + +print "not " unless $u->media_type eq "image/gif"; +print "ok 5\n"; + +if ($ENV{DISPLAY} && $ENV{XV}) { + open(XV, "| $ENV{XV} -") || die; + print XV $u->data; + close(XV); +} +print "not " unless length($u->data) == 273; +print "ok 6\n"; + +$u = URI->new("data:text/plain;charset=iso-8859-7,%be%fg%be"); # %fg +print "not " unless $u->data eq "\xBE%fg\xBE"; +print "ok 7\n"; + +$u = URI->new("data:application/vnd-xxx-query,select_vcount,fcol_from_fieldtable/local"); +print "not " unless $u->data eq "select_vcount,fcol_from_fieldtable/local"; +print "ok 8\n"; +$u->data(""); +print "not " unless $u eq "data:application/vnd-xxx-query,"; +print "ok 9\n"; + +$u->data("a,b"); $u->media_type(undef); +print "not " unless $u eq "data:,a,b"; +print "ok 10\n"; + +# Test automatic selection of URI/BASE64 encoding +$u = URI->new("data:"); +$u->data(""); +print "not " unless $u eq "data:,"; +print "ok 11\n"; + +$u->data(">"); +print "not " unless $u eq "data:,%3E" && $u->data eq ">"; +print "ok 12\n"; + +$u->data(">>>>>"); +print "not " unless $u eq "data:,%3E%3E%3E%3E%3E"; +print "ok 13\n"; + +$u->data(">>>>>>"); +print "not " unless $u eq "data:;base64,Pj4+Pj4+"; +print "ok 14\n"; + +$u->media_type("text/plain;foo=bar"); +print "not " unless $u eq "data:text/plain;foo=bar;base64,Pj4+Pj4+"; +print "ok 15\n"; + +$u->media_type("foo"); +print "not " unless $u eq "data:foo;base64,Pj4+Pj4+"; +print "ok 16\n"; + +$u->data(">" x 3000); +print "not " unless $u eq ("data:foo;base64," . ("Pj4+" x 1000)) && + $u->data eq (">" x 3000); +print "ok 17\n"; + +$u->media_type(undef); +$u->data(undef); +print "not " unless $u eq "data:,"; +print "ok 18\n"; + +$u = URI->new("data:foo"); +print "not " unless $u->media_type("bar,båz") eq "foo"; +print "ok 19\n"; + +print "not " unless $u->media_type eq "bar,båz"; +print "ok 20\n"; + +$old = $u->data("new"); +print "not " unless $old eq "" && $u eq "data:bar%2Cb%E5z,new"; +print "ok 21\n"; + +print "not " unless URI->new('data:;base64,%51%6D%70%76%5A%58%4A%75')->data eq "Bjoern"; +print "ok 22\n"; diff --git a/t/distmanifest.t b/t/distmanifest.t new file mode 100644 index 0000000..c2812f7 --- /dev/null +++ b/t/distmanifest.t @@ -0,0 +1,11 @@ +use strict; +use warnings; + +use Test::More; +BEGIN { + plan skip_all => 'these tests are for authors only!' + unless -d '.git' || $ENV{AUTHOR_TESTING}; +} + +use Test::DistManifest; +manifest_ok(); diff --git a/t/escape-char.t b/t/escape-char.t new file mode 100644 index 0000000..b03e43d --- /dev/null +++ b/t/escape-char.t @@ -0,0 +1,29 @@ +use strict; +use warnings; + +# see https://rt.cpan.org/Ticket/Display.html?id=96941 + +use Test::More; +use URI; + +TODO: { + my $str = "http://foo/\xE9"; + utf8::upgrade($str); + my $uri = URI->new($str); + + local $TODO = 'URI::Escape::escape_char misunderstands utf8'; + + # http://foo/%C3%A9 + is("$uri", 'http://foo/%E9', 'correctly created a URI from a utf8-upgraded string'); +} + +{ + my $str = "http://foo/\xE9"; + utf8::downgrade($str); + my $uri = URI->new($str); + + # http://foo/%E9 + is("$uri", 'http://foo/%E9', 'correctly created a URI from a utf8-downgrade string'); +} + +done_testing; diff --git a/t/escape.t b/t/escape.t new file mode 100644 index 0000000..05b8022 --- /dev/null +++ b/t/escape.t @@ -0,0 +1,37 @@ +use strict; +use warnings; + +use Test::More tests => 12; + +use URI::Escape; + +is uri_escape("|abcå"), "%7Cabc%E5"; + +is uri_escape("abc", "b-d"), "a%62%63"; + +# New escapes in RFC 3986 +is uri_escape("~*'()"), "~%2A%27%28%29"; +is uri_escape("<\">"), "%3C%22%3E"; + +is uri_escape(undef), undef; + +is uri_unescape("%7Cabc%e5"), "|abcå"; + +is_deeply [uri_unescape("%40A%42", "CDE", "F%47H")], [qw(@AB CDE FGH)]; + + +use URI::Escape qw(%escapes); + +is $escapes{"%"}, "%25"; + + +use URI::Escape qw(uri_escape_utf8); + +is uri_escape_utf8("|abcå"), "%7Cabc%C3%A5"; + +skip "Perl 5.8.0 or higher required", 3 if $] < 5.008; + +ok !eval { print uri_escape("abc" . chr(300)); 1 }; +like $@, qr/^Can\'t escape \\x\{012C\}, try uri_escape_utf8\(\) instead/; + +is uri_escape_utf8(chr(0xFFF)), "%E0%BF%BF"; diff --git a/t/file.t b/t/file.t new file mode 100644 index 0000000..26e0119 --- /dev/null +++ b/t/file.t @@ -0,0 +1,65 @@ +#!perl -T + +use strict; +use warnings; + +use URI::file; + +my @tests = ( +[ "file", "unix", "win32", "mac" ], +#---------------- ------------ --------------- -------------- +[ "file://localhost/foo/bar", + "!/foo/bar", "!\\foo\\bar", "!foo:bar", ], +[ "file:///foo/bar", + "/foo/bar", "\\foo\\bar", "!foo:bar", ], +[ "file:/foo/bar", "!/foo/bar", "!\\foo\\bar", "foo:bar", ], +[ "foo/bar", "foo/bar", "foo\\bar", ":foo:bar",], +[ "file://foo3445x/bar","!//foo3445x/bar", "!\\\\foo3445x\\bar", "!foo3445x:bar"], +[ "file://a:/", "!//a:/", "!A:\\", undef], +[ "file:///A:/", "/A:/", "A:\\", undef], +[ "file:///", "/", "\\", undef], +[ ".", ".", ".", ":"], +[ "..", "..", "..", "::"], +[ "%2E", "!.", "!.", ":."], +[ "../%2E%2E", "!../..", "!..\\..", "::.."], +); + +my @os = @{shift @tests}; +shift @os; # file + +my $num = @tests; +print "1..$num\n"; + +my $testno = 1; + +for my $t (@tests) { + my @t = @$t; + my $file = shift @t; + my $err; + + my $u = URI->new($file, "file"); + my $i = 0; + for my $os (@os) { + my $f = $u->file($os); + my $expect = $t[$i]; + $f = "<undef>" unless defined $f; + $expect = "<undef>" unless defined $expect; + my $loose; + $loose++ if $expect =~ s/^!//; + if ($expect ne $f) { + print "URI->new('$file', 'file')->file('$os') ne $expect, but $f\n"; + $err++; + } + if (defined($t[$i]) && !$loose) { + my $u2 = URI::file->new($t[$i], $os); + unless ($u2->as_string eq $file) { + print "URI::file->new('$t[$i]', '$os') ne $file, but $u2\n"; + $err++; + } + } + $i++; + } + print "not " if $err; + print "ok $testno\n"; + $testno++; +} @@ -0,0 +1,53 @@ +use strict; +use warnings; + +print "1..13\n"; + +use URI; +my $uri; + +$uri = URI->new("ftp://ftp.example.com/path"); + +print "not " unless $uri->scheme eq "ftp"; +print "ok 1\n"; + +print "not " unless $uri->host eq "ftp.example.com"; +print "ok 2\n"; + +print "not " unless $uri->port eq 21; +print "ok 3\n"; + +print "not " unless $uri->user eq "anonymous"; +print "ok 4\n"; + +print "not " unless $uri->password eq 'anonymous@'; +print "ok 5\n"; + +$uri->userinfo("gisle\@aas.no"); + +print "not " unless $uri eq "ftp://gisle%40aas.no\@ftp.example.com/path"; +print "ok 6\n"; + +print "not " unless $uri->user eq "gisle\@aas.no"; +print "ok 7\n"; + +print "not " if defined($uri->password); +print "ok 8\n"; + +$uri->password("secret"); + +print "not " unless $uri eq "ftp://gisle%40aas.no:secret\@ftp.example.com/path"; +print "ok 9\n"; + +$uri = URI->new("ftp://gisle\@aas.no:secret\@ftp.example.com/path"); +print "not " unless $uri eq "ftp://gisle\@aas.no:secret\@ftp.example.com/path"; +print "ok 10\n"; + +print "not " unless $uri->userinfo eq "gisle\@aas.no:secret"; +print "ok 11\n"; + +print "not " unless $uri->user eq "gisle\@aas.no"; +print "ok 12\n"; + +print "not " unless $uri->password eq "secret"; +print "ok 13\n"; diff --git a/t/generic.t b/t/generic.t new file mode 100644 index 0000000..e2f7b97 --- /dev/null +++ b/t/generic.t @@ -0,0 +1,219 @@ +use strict; +use warnings; + +print "1..48\n"; + +use URI; + +my $foo = URI->new("Foo:opaque#frag"); + +print "not " unless ref($foo) eq "URI::_foreign"; +print "ok 1\n"; + +print "not " unless $foo->as_string eq "Foo:opaque#frag"; +print "ok 2\n"; + +print "not " unless "$foo" eq "Foo:opaque#frag"; +print "ok 3\n"; + +# Try accessors +print "not " unless $foo->_scheme eq "Foo" && $foo->scheme eq "foo" && !$foo->has_recognized_scheme; +print "ok 4\n"; + +print "not " unless $foo->opaque eq "opaque"; +print "ok 5\n"; + +print "not " unless $foo->fragment eq "frag"; +print "ok 6\n"; + +print "not " unless $foo->canonical eq "foo:opaque#frag"; +print "ok 7\n"; + +# Try modificators +my $old = $foo->scheme("bar"); + +print "not " unless $old eq "foo" && $foo eq "bar:opaque#frag"; +print "ok 8\n"; + +$old = $foo->scheme(""); +print "not " unless $old eq "bar" && $foo eq "opaque#frag"; +print "ok 9\n"; + +$old = $foo->scheme("foo"); +$old = $foo->scheme(undef); + +print "not " unless $old eq "foo" && $foo eq "opaque#frag"; +print "ok 10\n"; + +$foo->scheme("foo"); + + +$old = $foo->opaque("xxx"); +print "not " unless $old eq "opaque" && $foo eq "foo:xxx#frag"; +print "ok 11\n"; + +$old = $foo->opaque(""); +print "not " unless $old eq "xxx" && $foo eq "foo:#frag"; +print "ok 12\n"; + +$old = $foo->opaque(" #?/"); +$old = $foo->opaque(undef); +print "not " unless $old eq "%20%23?/" && $foo eq "foo:#frag"; +print "ok 13\n"; + +$foo->opaque("opaque"); + + +$old = $foo->fragment("x"); +print "not " unless $old eq "frag" && $foo eq "foo:opaque#x"; +print "ok 14\n"; + +$old = $foo->fragment(""); +print "not " unless $old eq "x" && $foo eq "foo:opaque#"; +print "ok 15\n"; + +$old = $foo->fragment(undef); +print "not " unless $old eq "" && $foo eq "foo:opaque"; +print "ok 16\n"; + + +# Compare +print "not " unless $foo->eq("Foo:opaque") && + $foo->eq(URI->new("FOO:opaque")) && + $foo->eq("foo:opaque"); +print "ok 17\n"; + +print "not " if $foo->eq("Bar:opaque") || + $foo->eq("foo:opaque#"); +print "ok 18\n"; + + +# Try hierarchal unknown URLs + +$foo = URI->new("foo://host:80/path?query#frag"); + +print "not " unless "$foo" eq "foo://host:80/path?query#frag"; +print "ok 19\n"; + +# Accessors +print "not " unless $foo->scheme eq "foo"; +print "ok 20\n"; + +print "not " unless $foo->authority eq "host:80"; +print "ok 21\n"; + +print "not " unless $foo->path eq "/path"; +print "ok 22\n"; + +print "not " unless $foo->query eq "query"; +print "ok 23\n"; + +print "not " unless $foo->fragment eq "frag"; +print "ok 24\n"; + +# Modificators +$old = $foo->authority("xxx"); +print "not " unless $old eq "host:80" && $foo eq "foo://xxx/path?query#frag"; +print "ok 25\n"; + +$old = $foo->authority(""); +print "not " unless $old eq "xxx" && $foo eq "foo:///path?query#frag"; +print "ok 26\n"; + +$old = $foo->authority(undef); +print "not " unless $old eq "" && $foo eq "foo:/path?query#frag"; +print "ok 27\n"; + +$old = $foo->authority("/? #;@&"); +print "not " unless !defined($old) && $foo eq "foo://%2F%3F%20%23;@&/path?query#frag"; +print "ok 28\n"; + +$old = $foo->authority("host:80"); +print "not " unless $old eq "%2F%3F%20%23;@&" && $foo eq "foo://host:80/path?query#frag"; +print "ok 29\n"; + + +$old = $foo->path("/foo"); +print "not " unless $old eq "/path" && $foo eq "foo://host:80/foo?query#frag"; +print "ok 30\n"; + +$old = $foo->path("bar"); +print "not " unless $old eq "/foo" && $foo eq "foo://host:80/bar?query#frag"; +print "ok 31\n"; + +$old = $foo->path(""); +print "not " unless $old eq "/bar" && $foo eq "foo://host:80?query#frag"; +print "ok 32\n"; + +$old = $foo->path(undef); +print "not " unless $old eq "" && $foo eq "foo://host:80?query#frag"; +print "ok 33\n"; + +$old = $foo->path("@;/?#"); +print "not " unless $old eq "" && $foo eq "foo://host:80/@;/%3F%23?query#frag"; +print "ok 34\n"; + +$old = $foo->path("path"); +print "not " unless $old eq "/@;/%3F%23" && $foo eq "foo://host:80/path?query#frag"; +print "ok 35\n"; + + +$old = $foo->query("foo"); +print "not " unless $old eq "query" && $foo eq "foo://host:80/path?foo#frag"; +print "ok 36\n"; + +$old = $foo->query(""); +print "not " unless $old eq "foo" && $foo eq "foo://host:80/path?#frag"; +print "ok 37\n"; + +$old = $foo->query(undef); +print "not " unless $old eq "" && $foo eq "foo://host:80/path#frag"; +print "ok 38\n"; + +$old = $foo->query("/?&=# "); +print "not " unless !defined($old) && $foo eq "foo://host:80/path?/?&=%23%20#frag"; +print "ok 39\n"; + +$old = $foo->query("query"); +print "not " unless $old eq "/?&=%23%20" && $foo eq "foo://host:80/path?query#frag"; +print "ok 40\n"; + +# Some buildup trics +$foo = URI->new(""); +$foo->path("path"); +$foo->authority("auth"); + +print "not " unless $foo eq "//auth/path"; +print "ok 41\n"; + +$foo = URI->new("", "http:"); +$foo->query("query"); +$foo->authority("auth"); +print "not " unless $foo eq "//auth?query" && $foo->has_recognized_scheme; +print "ok 42\n"; + +$foo->path("path"); +print "not " unless $foo eq "//auth/path?query"; +print "ok 43\n"; + +$foo = URI->new(""); +$old = $foo->path("foo"); +print "not " unless $old eq "" && $foo eq "foo" && !$foo->has_recognized_scheme; +print "ok 44\n"; + +$old = $foo->path("bar"); +print "not " unless $old eq "foo" && $foo eq "bar"; +print "ok 45\n"; + +$old = $foo->opaque("foo"); +print "not " unless $old eq "bar" && $foo eq "foo"; +print "ok 46\n"; + +$old = $foo->path(""); +print "not " unless $old eq "foo" && $foo eq ""; +print "ok 47\n"; + +$old = $foo->query("q"); +print "not " unless !defined($old) && $foo eq "?q"; +print "ok 48\n"; + diff --git a/t/gopher.t b/t/gopher.t new file mode 100644 index 0000000..427a5fc --- /dev/null +++ b/t/gopher.t @@ -0,0 +1,46 @@ +use strict; +use warnings; + +print "1..48\n"; + +use URI; + +my $t = 1; +sub is { + my ($exp, $got) = @_; + if (!defined $exp) { + print "not " if defined $got; + } + else { + print "not " unless $got eq $exp; + } + print "ok " . ($t++) . "\n"; +} + +sub check_gopher_uri { + my ($u, $exphost, $expport, $exptype, $expselector, $expsearch) = @_; + is("gopher", $u->scheme); + is($exphost, $u->host); + is($expport, $u->port); + is($exptype, $u->gopher_type); + is($expselector, $u->selector); + is($expsearch, $u->search); +} + +my $u; +$u = URI->new("gopher://host"); +check_gopher_uri($u, "host", 70, 1); +$u = URI->new("gopher://host:70"); +check_gopher_uri($u, "host", 70, 1); +$u = URI->new("gopher://host:70/"); +check_gopher_uri($u, "host", 70, 1); +$u = URI->new("gopher://host:70/1"); +check_gopher_uri($u, "host", 70, 1); +$u = URI->new("gopher://host:70/1"); +check_gopher_uri($u, "host", 70, 1); +$u = URI->new("gopher://host:123/7foo"); +check_gopher_uri($u, "host", 123, 7, "foo"); +$u = URI->new("gopher://host/7foo\tbar%20baz"); +check_gopher_uri($u, "host", 70, 7, "foo", "bar baz"); +$u = URI->new("gopher://host/7foo%09bar%20baz"); +check_gopher_uri($u, "host", 70, 7, "foo", "bar baz"); diff --git a/t/heuristic.t b/t/heuristic.t new file mode 100644 index 0000000..63c2ad8 --- /dev/null +++ b/t/heuristic.t @@ -0,0 +1,138 @@ +use strict; +use warnings; + +BEGIN { + # mock up a gethostbyname that always works :-) + *CORE::GLOBAL::gethostbyname = sub { + my $name = shift; + #print "# gethostbyname [$name]\n"; + die if wantarray; + return 1 if $name =~ /^www\.perl\.(com|org|ca|su)\.$/; + return 1 if $name eq "www.perl.co.uk\."; + return 0; + }; +} + +print "1..26\n"; + +use URI::Heuristic qw(uf_urlstr uf_url); +if (shift) { + $URI::Heuristic::DEBUG++; + open(STDERR, ">&STDOUT"); # redirect STDERR +} + +print "not " unless uf_urlstr("http://www.sn.no/") eq "http://www.sn.no/"; +print "ok 1\n"; + +if ($^O eq "MacOS") { + print "not " unless uf_urlstr("etc:passwd") eq "file:/etc/passwd"; +} else { +print "not " unless uf_urlstr("/etc/passwd") eq "file:/etc/passwd"; +} +print "ok 2\n"; + +if ($^O eq "MacOS") { + print "not " unless uf_urlstr(":foo.txt") eq "file:./foo.txt"; +} else { +print "not " unless uf_urlstr("./foo.txt") eq "file:./foo.txt"; +} +print "ok 3\n"; + +print "not " unless uf_urlstr("ftp.aas.no/lwp.tar.gz") eq "ftp://ftp.aas.no/lwp.tar.gz"; +print "ok 4\n"; + +if($^O eq "MacOS") { +# its a weird, but valid, MacOS path, so it can't be left alone + print "not " unless uf_urlstr("C:\\CONFIG.SYS") eq "file:/C/%5CCONFIG.SYS"; +} else { +print "not " unless uf_urlstr("C:\\CONFIG.SYS") eq "file:C:\\CONFIG.SYS"; +} +print "ok 5\n"; + +{ + local $ENV{LC_ALL} = ""; + local $ENV{LANG} = ""; + local $ENV{HTTP_ACCEPT_LANGUAGE} = ""; + + $ENV{LC_ALL} = "en_GB.UTF-8"; + undef $URI::Heuristic::MY_COUNTRY; + print "not " unless uf_urlstr("perl/camel.gif") =~ m,^http://www\.perl\.(org|co)\.uk/camel\.gif$,; + print "ok 6\n"; + + use Net::Domain; + $ENV{LC_ALL} = "C"; + { no warnings; *Net::Domain::hostfqdn = sub { return 'vasya.su' } } + undef $URI::Heuristic::MY_COUNTRY; + print "not " unless uf_urlstr("perl/camel.gif") =~ m,^http://www\.perl\.su/camel\.gif$,; + print "ok 7\n"; + + $ENV{LC_ALL} = "C"; + { no warnings; *Net::Domain::hostfqdn = sub { return '' } } + undef $URI::Heuristic::MY_COUNTRY; + print "not " unless uf_urlstr("perl/camel.gif") =~ m,^http://www\.perl\.(com|org)/camel\.gif$,; + print "ok 8\n"; + + $ENV{HTTP_ACCEPT_LANGUAGE} = "en-ca"; + undef $URI::Heuristic::MY_COUNTRY; + print "not " unless uf_urlstr("perl/camel.gif") eq "http://www.perl.ca/camel.gif"; + print "ok 9\n"; +} + +$URI::Heuristic::MY_COUNTRY = "bv"; +print "not " unless uf_urlstr("perl/camel.gif") =~ m,^http://www\.perl\.(com|org)/camel\.gif$,; +print "ok 10\n"; + +# Backwards compatibility; uk != United Kingdom in ISO 3166 +$URI::Heuristic::MY_COUNTRY = "uk"; +print "not " unless uf_urlstr("perl/camel.gif") =~ m,^http://www\.perl\.(org|co)\.uk/camel\.gif$,; +print "ok 11\n"; + +$URI::Heuristic::MY_COUNTRY = "gb"; +print "not " unless uf_urlstr("perl/camel.gif") =~ m,^http://www\.perl\.(org|co)\.uk/camel\.gif$,; +print "ok 12\n"; + +$ENV{URL_GUESS_PATTERN} = "www.ACME.org www.ACME.com"; +print "not " unless uf_urlstr("perl") eq "http://www.perl.org"; +print "ok 13\n"; + +{ + local $ENV{URL_GUESS_PATTERN} = ""; + print "not " unless uf_urlstr("perl") eq "http://perl"; + print "ok 14\n"; + + print "not " unless uf_urlstr("http:80") eq "http:80"; + print "ok 15\n"; + + print "not " unless uf_urlstr("mailto:gisle\@aas.no") eq "mailto:gisle\@aas.no"; + print "ok 16\n"; + + print "not " unless uf_urlstr("gisle\@aas.no") eq "mailto:gisle\@aas.no"; + print "ok 17\n"; + + print "not " unless uf_urlstr("Gisle.Aas\@aas.perl.org") eq "mailto:Gisle.Aas\@aas.perl.org"; + print "ok 18\n"; + + print "not " unless uf_url("gopher.sn.no")->scheme eq "gopher"; + print "ok 19\n"; + + print "not " unless uf_urlstr("123.3.3.3:8080/foo") eq "http://123.3.3.3:8080/foo"; + print "ok 20\n"; + + print "not " unless uf_urlstr("123.3.3.3:443/foo") eq "https://123.3.3.3:443/foo"; + print "ok 21\n"; + + print "not " unless uf_urlstr("123.3.3.3:21/foo") eq "ftp://123.3.3.3:21/foo"; + print "ok 22\n"; + + print "not " unless uf_url("FTP.example.com")->scheme eq "ftp"; + print "ok 23\n"; + + print "not " unless uf_url("ftp2.example.com")->scheme eq "ftp"; + print "ok 24\n"; + + print "not " unless uf_url("ftp")->scheme eq "ftp"; + print "ok 25\n"; + + print "not " unless uf_url("https.example.com")->scheme eq "https"; + print "ok 26\n"; +} diff --git a/t/http.t b/t/http.t new file mode 100644 index 0000000..fb30124 --- /dev/null +++ b/t/http.t @@ -0,0 +1,66 @@ +use strict; +use warnings; + +print "1..16\n"; + +use URI; + +my $u = URI->new("<http://www.perl.com/path?q=fôo>"); + +#print "$u\n"; +print "not " unless $u eq "http://www.perl.com/path?q=f%F4o"; +print "ok 1\n"; + +print "not " unless $u->port == 80; +print "ok 2\n"; + +# play with port +my $old = $u->port(8080); +print "not " unless $old == 80 && $u eq "http://www.perl.com:8080/path?q=f%F4o"; +print "ok 3\n"; + +$u->port(80); +print "not " unless $u eq "http://www.perl.com:80/path?q=f%F4o"; +print "ok 4\n"; + +$u->port(""); +print "not " unless $u eq "http://www.perl.com:/path?q=f%F4o" && $u->port == 80; +print "ok 5\n"; + +$u->port(undef); +print "not " unless $u eq "http://www.perl.com/path?q=f%F4o"; +print "ok 6\n"; + +my @q = $u->query_form; +print "not " unless @q == 2 && "@q" eq "q fôo"; +print "ok 7\n"; + +$u->query_form(foo => "bar", bar => "baz"); +print "not " unless $u->query eq "foo=bar&bar=baz"; +print "ok 8\n"; + +print "not " unless $u->host eq "www.perl.com"; +print "ok 9\n"; + +print "not " unless $u->path eq "/path"; +print "ok 10\n"; + +print "not " if $u->secure; +print "ok 11\n"; + +$u->scheme("https"); +print "not " unless $u->port == 443; +print "ok 12\n"; + +print "not " unless $u eq "https://www.perl.com/path?foo=bar&bar=baz"; +print "ok 13\n"; + +print "not " unless $u->secure; +print "ok 14\n"; + +$u = URI->new("http://%77%77%77%2e%70%65%72%6c%2e%63%6f%6d/%70%75%62/%61/%32%30%30%31/%30%38/%32%37/%62%6a%6f%72%6e%73%74%61%64%2e%68%74%6d%6c"); +print "not " unless $u->canonical eq "http://www.perl.com/pub/a/2001/08/27/bjornstad.html"; +print "ok 15\n"; + +print "not " unless $u->has_recognized_scheme; +print "ok 16\n"; diff --git a/t/idna.t b/t/idna.t new file mode 100644 index 0000000..da2ad98 --- /dev/null +++ b/t/idna.t @@ -0,0 +1,14 @@ +use strict; +use warnings; + +use utf8; +use Test::More tests => 7; +use URI::_idna; + +is URI::_idna::encode("www.example.com"), "www.example.com"; +is URI::_idna::decode("www.example.com"), "www.example.com"; +is URI::_idna::encode("www.example.com."), "www.example.com."; +is URI::_idna::decode("www.example.com."), "www.example.com."; +is URI::_idna::encode("Bücher.ch"), "xn--bcher-kva.ch"; +is URI::_idna::decode("xn--bcher-kva.ch"), "bücher.ch"; +is URI::_idna::decode("xn--bcher-KVA.ch"), "bücher.ch"; @@ -0,0 +1,76 @@ +use strict; +use warnings; + +use utf8; +use Test::More; +use Config; + +if (defined $Config{useperlio}) { + plan tests=>26; +} else { + plan skip_all=>'this perl doesn\'t support PerlIO layers'; +} + +use URI; +use URI::IRI; + +my $u; + +binmode Test::More->builder->output, ':encoding(UTF-8)'; +binmode Test::More->builder->failure_output, ':encoding(UTF-8)'; + +$u = URI->new("http://Bücher.ch"); +is $u, "http://xn--bcher-kva.ch"; +is $u->host, "xn--bcher-kva.ch"; +is $u->ihost, "bücher.ch"; +is $u->as_iri, "http://bücher.ch"; + +$u = URI->new("http://example.com/Bücher"); +is $u, "http://example.com/B%C3%BCcher"; +is $u->as_iri, "http://example.com/Bücher"; + +$u = URI->new("http://example.com/B%FCcher"); # latin1 encoded stuff +is $u->as_iri, "http://example.com/B%FCcher"; # ...should not be decoded + +$u = URI->new("http://example.com/B\xFCcher"); +is $u->as_string, "http://example.com/B%FCcher"; +is $u->as_iri, "http://example.com/B%FCcher"; + +$u = URI::IRI->new("http://example.com/B\xFCcher"); +is $u->as_string, "http://example.com/Bücher"; +is $u->as_iri, "http://example.com/Bücher"; + +# draft-duerst-iri-bis.txt claims this should map to xn--rsum-bad.example.org +$u = URI->new("http://r\xE9sum\xE9.example.org"); +is $u->as_string, "http://xn--rsum-bpad.example.org"; + +$u = URI->new("http://xn--rsum-bad.example.org"); +is $u->as_iri, "http://r\x80sum\x80.example.org"; + +$u = URI->new("http://r%C3%A9sum%C3%A9.example.org"); +is $u->as_string, "http://r%C3%A9sum%C3%A9.example.org"; +is $u->as_iri, "http://r\xE9sum\xE9.example.org"; + +$u = URI->new("http://âž¡.ws/"); +is $u, "http://xn--hgi.ws/"; +is $u->host, "xn--hgi.ws"; +is $u->ihost, "âž¡.ws"; +is $u->as_iri, "http://âž¡.ws/"; + +# draft-duerst-iri-bis.txt examples (section 3.7.1): +is(URI->new("http://www.example.org/D%C3%BCrst")->as_iri, "http://www.example.org/D\xFCrst"); +is(URI->new("http://www.example.org/D%FCrst")->as_iri, "http://www.example.org/D%FCrst"); +TODO: { + local $TODO = "some chars (like U+202E, RIGHT-TO-LEFT OVERRIDE) need to stay escaped"; +is(URI->new("http://xn--99zt52a.example.org/%e2%80%ae")->as_iri, "http://\x{7D0D}\x{8C46}.example.org/%e2%80%ae"); +} + +# try some URLs that can't be IDNA encoded (fallback to encoded UTF8 bytes) +$u = URI->new("http://" . ("ü" x 128)); +is $u, "http://" . ("%C3%BC" x 128); +is $u->host, ("\xC3\xBC" x 128); +TODO: { + local $TODO = "should ihost decode UTF8 bytes?"; + is $u->ihost, ("ü" x 128); +} +is $u->as_iri, "http://" . ("ü" x 128); diff --git a/t/ldap.t b/t/ldap.t new file mode 100644 index 0000000..3cd3dd8 --- /dev/null +++ b/t/ldap.t @@ -0,0 +1,119 @@ +use strict; +use warnings; + +print "1..24\n"; + +use URI; + +my $uri; + +$uri = URI->new("ldap://host/dn=base?cn,sn?sub?objectClass=*"); + +print "not " unless $uri->host eq "host"; +print "ok 1\n"; + +print "not " unless $uri->dn eq "dn=base"; +print "ok 2\n"; + +print "not " unless join("-",$uri->attributes) eq "cn-sn"; +print "ok 3\n"; + +print "not " unless $uri->scope eq "sub"; +print "ok 4\n"; + +print "not " unless $uri->filter eq "objectClass=*"; +print "ok 5\n"; + +$uri = URI->new("ldap:"); +$uri->dn("o=University of Michigan,c=US"); + +print "not " unless "$uri" eq "ldap:o=University%20of%20Michigan,c=US" && + $uri->dn eq "o=University of Michigan,c=US"; +print "ok 6\n"; + +$uri->host("ldap.itd.umich.edu"); +print "not " unless $uri->as_string eq "ldap://ldap.itd.umich.edu/o=University%20of%20Michigan,c=US"; +print "ok 7\n"; + +# check defaults +print "not " unless $uri->_scope eq "" && + $uri->scope eq "base" && + $uri->_filter eq "" && + $uri->filter eq "(objectClass=*)"; +print "ok 8\n"; + +# attribute +$uri->attributes("postalAddress"); +print "not " unless $uri eq "ldap://ldap.itd.umich.edu/o=University%20of%20Michigan,c=US?postalAddress"; +print "ok 9\n"; + +# does attribute escapeing work as it should +$uri->attributes($uri->attributes, "foo", ",", "*", "?", "#", "\0"); + +print "not " unless $uri->attributes eq "postalAddress,foo,%2C,*,%3F,%23,%00" && + join("-", $uri->attributes) eq "postalAddress-foo-,-*-?-#-\0"; +print "ok 10\n"; +$uri->attributes(""); + +$uri->scope("sub?#"); +print "not " unless $uri->query eq "?sub%3F%23" && + $uri->scope eq "sub?#"; +print "ok 11\n"; +$uri->scope(""); + +$uri->filter("f=?,#"); +print "not " unless $uri->query eq "??f=%3F,%23" && + $uri->filter eq "f=?,#"; + +$uri->filter("(int=\\00\\00\\00\\04)"); +print "not " unless $uri->query eq "??(int=%5C00%5C00%5C00%5C04)"; +print "ok 12\n"; + + +print "ok 13\n"; +$uri->filter(""); + +$uri->extensions("!bindname" => "cn=Manager,co=Foo"); +my %ext = $uri->extensions; + +print "not " unless $uri->query eq "???!bindname=cn=Manager%2Cco=Foo" && + keys %ext == 1 && + $ext{"!bindname"} eq "cn=Manager,co=Foo"; +print "ok 14\n"; + +$uri = URI->new("ldap://LDAP-HOST:389/o=University%20of%20Michigan,c=US?postalAddress?base?ObjectClass=*?FOO=Bar,bindname=CN%3DManager%CO%3dFoo"); + +print "not " unless $uri->canonical eq "ldap://ldap-host/o=University%20of%20Michigan,c=US?postaladdress???foo=Bar,bindname=CN=Manager%CO=Foo"; +print "ok 15\n"; + +print "$uri\n"; +print $uri->canonical, "\n"; + +print "not " if $uri->secure; +print "ok 16\n"; + +$uri = URI->new("ldaps://host/dn=base?cn,sn?sub?objectClass=*"); + +print "not " unless $uri->host eq "host"; +print "ok 17\n"; +print "not " unless $uri->port eq 636; +print "ok 18\n"; +print "not " unless $uri->dn eq "dn=base"; +print "ok 19\n"; +print "not " unless $uri->secure; +print "ok 20\n"; + +$uri = URI->new("ldapi://%2Ftmp%2Fldap.sock/????x-mod=-w--w----"); +print "not " unless $uri->authority eq "%2Ftmp%2Fldap.sock"; +print "ok 21\n"; +print "not " unless $uri->un_path eq "/tmp/ldap.sock"; +print "ok 22\n"; + +$uri->un_path("/var/x\@foo:bar/"); +print "not " unless $uri eq "ldapi://%2Fvar%2Fx%40foo%3Abar%2F/????x-mod=-w--w----"; +print "ok 23\n"; + +%ext = $uri->extensions; +print "not " unless $ext{"x-mod"} eq "-w--w----"; +print "ok 24\n"; + diff --git a/t/mailto.t b/t/mailto.t new file mode 100644 index 0000000..f13a1f8 --- /dev/null +++ b/t/mailto.t @@ -0,0 +1,48 @@ +use strict; +use warnings; + +print "1..7\n"; + +use URI; + +my $u = URI->new('mailto:gisle@aas.no'); + +print "not " unless $u->to eq 'gisle@aas.no' && + $u eq 'mailto:gisle@aas.no'; +print "ok 1\n"; + +my $old = $u->to('larry@wall.org'); +print "not " unless $old eq 'gisle@aas.no' && + $u->to eq 'larry@wall.org' && + $u eq 'mailto:larry@wall.org'; +print "ok 2\n"; + +$u->to("?/#"); +print "not " unless $u->to eq "?/#" && + $u eq 'mailto:%3F/%23'; +print "ok 3\n"; + +my @h = $u->headers; +print "not " unless @h == 2 && "@h" eq "to ?/#"; +print "ok 4\n"; + +$u->headers(to => 'gisle@aas.no', + cc => 'gisle@ActiveState.com,larry@wall.org', + Subject => 'How do you do?', + garbage => '/;?#=&', +); + +@h = $u->headers; +print "not " unless $u->to eq 'gisle@aas.no' && + @h == 8 && + "@h" eq 'to gisle@aas.no cc gisle@ActiveState.com,larry@wall.org Subject How do you do? garbage /;?#=&'; +print "ok 5\n"; + +#print "$u\n"; +print "not " unless $u eq 'mailto:gisle@aas.no?cc=gisle%40ActiveState.com%2Clarry%40wall.org&Subject=How+do+you+do%3F&garbage=%2F%3B%3F%23%3D%26'; +print "ok 6\n"; + +$u = URI->new("mailto:"); +$u->to("gisle"); +print "not " unless $u eq 'mailto:gisle'; +print "ok 7\n"; @@ -0,0 +1,80 @@ +use strict; +use warnings; + +print "1..6\n"; + +# Test mixing of URI and URI::WithBase objects +use URI; +use URI::WithBase; +use URI::URL; + +my $str = "http://www.sn.no/"; +my $rel = "path/img.gif"; + +my $u = URI->new($str); +my $uw = URI::WithBase->new($str, "http:"); +my $uu = URI::URL->new($str); + +my $a = URI->new($rel, $u); +my $b = URI->new($rel, $uw); +my $c = URI->new($rel, $uu); +my $d = URI->new($rel, $str); + +sub Dump +{ + require Data::Dumper; + print Data::Dumper->Dump([$a, $b, $c, $d], [qw(a b c d)]); +} + +#Dump(); +print "not " unless $a->isa("URI") && + ref($b) eq ref($uw) && + ref($c) eq ref($uu) && + $d->isa("URI"); +print "ok 1\n"; + +print "not " if $b->base && $c->base; +print "ok 2\n"; + +$a = URI::URL->new($rel, $u); +$b = URI::URL->new($rel, $uw); +$c = URI::URL->new($rel, $uu); +$d = URI::URL->new($rel, $str); + +print "not " unless ref($a) eq "URI::URL" && + ref($b) eq "URI::URL" && + ref($c) eq "URI::URL" && + ref($d) eq "URI::URL"; +print "ok 3\n"; + +print "not " unless ref($b->base) eq ref($uw) && + $b->base eq $uw && + ref($c->base) eq ref($uu) && + $c->base eq $uu && + $d->base eq $str; +print "ok 4\n"; + + + +$a = URI->new($uu, $u); +$b = URI->new($uu, $uw); +$c = URI->new($uu, $uu); +$d = URI->new($uu, $str); + +#Dump(); +print "not " unless ref($a) eq ref($b) && + ref($b) eq ref($c) && + ref($c) eq ref($d) && + ref($d) eq ref($u); +print "ok 5\n"; + +$a = URI::URL->new($u, $u); +$b = URI::URL->new($u, $uw); +$c = URI::URL->new($u, $uu); +$d = URI::URL->new($u, $str); + +print "not " unless ref($a) eq "URI::URL" && + ref($b) eq "URI::URL" && + ref($c) eq "URI::URL" && + ref($d) eq "URI::URL"; +print "ok 6\n"; @@ -0,0 +1,38 @@ +use strict; +use warnings; + +print "1..8\n"; + +use URI; + +my $u = URI->new("<mms://66.250.188.13/KFOG_FM>"); + +#print "$u\n"; +print "not " unless $u eq "mms://66.250.188.13/KFOG_FM"; +print "ok 1\n"; + +print "not " unless $u->port == 1755; +print "ok 2\n"; + +# play with port +my $old = $u->port(8755); +print "not " unless $old == 1755 && $u eq "mms://66.250.188.13:8755/KFOG_FM"; +print "ok 3\n"; + +$u->port(1755); +print "not " unless $u eq "mms://66.250.188.13:1755/KFOG_FM"; +print "ok 4\n"; + +$u->port(""); +print "not " unless $u eq "mms://66.250.188.13:/KFOG_FM" && $u->port == 1755; +print "ok 5\n"; + +$u->port(undef); +print "not " unless $u eq "mms://66.250.188.13/KFOG_FM"; +print "ok 6\n"; + +print "not " unless $u->host eq "66.250.188.13"; +print "ok 7\n"; + +print "not " unless $u->path eq "/KFOG_FM"; +print "ok 8\n"; diff --git a/t/news.t b/t/news.t new file mode 100644 index 0000000..a009a9e --- /dev/null +++ b/t/news.t @@ -0,0 +1,51 @@ +use strict; +use warnings; + +print "1..7\n"; + +use URI; + +my $u = URI->new("news:comp.lang.perl.misc"); + +print "not " unless $u->group eq "comp.lang.perl.misc" && + !defined($u->message) && + $u->port == 119 && + $u eq "news:comp.lang.perl.misc"; +print "ok 1\n"; + + +$u->host("news.online.no"); +print "not " unless $u->group eq "comp.lang.perl.misc" && + $u->port == 119 && + $u eq "news://news.online.no/comp.lang.perl.misc"; +print "ok 2\n"; + +$u->group("no.perl", 1 => 10); +print "not " unless $u eq "news://news.online.no/no.perl/1-10"; +print "ok 3\n"; + +my @g = $u->group; +#print "G: @g\n"; +print "not " unless @g == 3 && "@g" eq "no.perl 1 10"; +print "ok 4\n"; + +$u->message('42@g.aas.no'); +#print "$u\n"; +print "not " unless $u->message eq '42@g.aas.no' && + !defined($u->group) && + $u eq 'news://news.online.no/42@g.aas.no'; +print "ok 5\n"; + + +$u = URI->new("nntp:no.perl"); +print "not " unless $u->group eq "no.perl" && + $u->port == 119; +print "ok 6\n"; + +$u = URI->new("snews://snews.online.no/no.perl"); + +print "not " unless $u->group eq "no.perl" && + $u->host eq "snews.online.no" && + $u->port == 563; +print "ok 7\n"; + diff --git a/t/num_eq.t b/t/num_eq.t new file mode 100644 index 0000000..066d84c --- /dev/null +++ b/t/num_eq.t @@ -0,0 +1,16 @@ +# Test URI's overloading of numeric comparison for checking object +# equality + +use strict; +use warnings; +use Test::More 'no_plan'; + +use URI; + +my $uri1 = URI->new("http://foo.com"); +my $uri2 = URI->new("http://foo.com"); + +# cmp_ok() has a bug/misfeature where it strips overloading +# before doing the comparison. So use a regular ok(). +ok $uri1 == $uri1, "=="; +ok $uri1 != $uri2, "!="; diff --git a/t/old-absconf.t b/t/old-absconf.t new file mode 100644 index 0000000..536f4d7 --- /dev/null +++ b/t/old-absconf.t @@ -0,0 +1,38 @@ +use strict; +use warnings; + +print "1..6\n"; + +use URI::URL qw(url); + +# Test configuration via some global variables. + +$URI::URL::ABS_REMOTE_LEADING_DOTS = 1; +$URI::URL::ABS_ALLOW_RELATIVE_SCHEME = 1; + +my $u1 = url("../../../../abc", "http://web/a/b"); + +print "not " unless $u1->abs->as_string eq "http://web/abc"; +print "ok 1\n"; + +{ + local $URI::URL::ABS_REMOTE_LEADING_DOTS; + print "not " unless $u1->abs->as_string eq "http://web/../../../abc"; + print "ok 2\n"; +} + + +$u1 = url("http:../../../../abc", "http://web/a/b"); +print "not " unless $u1->abs->as_string eq "http://web/abc"; +print "ok 3\n"; + +{ + local $URI::URL::ABS_ALLOW_RELATIVE_SCHEME; + print "not " unless $u1->abs->as_string eq "http:../../../../abc"; + print "ok 4\n"; + print "not " unless $u1->abs(undef,1)->as_string eq "http://web/abc"; + print "ok 5\n"; +} + +print "not " unless $u1->abs(undef,0)->as_string eq "http:../../../../abc"; +print "ok 6\n"; diff --git a/t/old-base.t b/t/old-base.t new file mode 100644 index 0000000..77b562b --- /dev/null +++ b/t/old-base.t @@ -0,0 +1,978 @@ +use strict; +use warnings; + +use Test::More; +use URI::URL qw(url); +use URI::Escape qw(uri_escape uri_unescape); +use File::Temp 'tempdir'; + +# want compatibility +use URI::file; +$URI::file::DEFAULT_AUTHORITY = undef; + + +package main; + +# Must ensure that there is no relative paths in @INC because we will +# chdir in the newlocal tests. +unless ($^O eq "MacOS") { +chomp(my $pwd = ($^O =~ /mswin32/i ? `cd` : $^O eq 'VMS' ? `show default` : `pwd`)); +if ($^O eq 'VMS') { + $pwd =~ s#^\s+##; + $pwd = VMS::Filespec::unixpath($pwd); + $pwd =~ s#/$##; +} +for (@INC) { + my $x = $_; + $x = VMS::Filespec::unixpath($x) if $^O eq 'VMS'; + next if $x =~ m|^/| or $^O =~ /os2|mswin32/i + and $x =~ m#^(\w:[\\/]|[\\/]{2})#; + note "Turn lib path $x into $pwd/$x\n"; + $_ = "$pwd/$x"; + +} +} + +$| = 1; + +# Do basic tests first. + +note "Self tests for URI::URL version $URI::URL::VERSION...\n"; + +subtest 'scheme tests' => \&scheme_parse_test; + +subtest 'parts test' => \&parts_test; + +subtest 'escape test' => \&escape_test; + +subtest 'newlocal test' => \&newlocal_test; + +subtest 'Test relative/absolute URI::URL parsing' => \&absolute_test; + +subtest 'eq test' => \&eq_test; + +# Let's test making our own things +URI::URL::strict(0); +# This should work after URI::URL::strict(0) +my $url = new URI::URL "x-myscheme:something"; +# Since no implementor is registered for 'x-myscheme' then it will +# be handled by the URI::URL::_generic class +is($url->as_string, 'x-myscheme:something', ref($url) . '->as_string'); +is($url->path, 'something', ref($url) . '->path'); +URI::URL::strict(1); + +=comment + +# Let's try to make our URL subclass +{ + package MyURL; + @ISA = URI::URL::implementor(); + + sub _parse { + my($self, $init) = @_; + $self->URI::URL::_generic::_parse($init, qw(netloc path)); + } + + sub foo { + my $self = shift; + print ref($self)."->foo called for $self\n"; + } +} +# Let's say that it implements the 'x-a+b.c' scheme (alias 'x-foo') +URI::URL::implementor('x-a+b.c', 'MyURL'); +URI::URL::implementor('x-foo', 'MyURL'); + +# Now we are ready to try our new URL scheme +$url = new URI::URL 'x-a+b.c://foo/bar;a?b'; +is($url->as_string, 'x-a+b.c://foo/bar;a?b', ref($url) . '->as_string'); +is($url->path, '/bar;a?b', ref($url) . '->path'); +$url->foo; +$newurl = new URI::URL 'xxx', $url; +$newurl->foo; +$url = new URI::URL 'yyy', 'x-foo:'; +$url->foo; + +=cut + +# Test the new wash&go constructor +is(url("../foo.html", "http://www.sn.no/a/b")->abs->as_string, + 'http://www.sn.no/foo.html', 'wash&go'); + +note "URI::URL version $URI::URL::VERSION ok\n"; + +done_testing; +exit 0; + + + + +##################################################################### +# +# scheme_parse_test() +# +# test parsing and retrieval methods + +sub scheme_parse_test { + + my $tests = { + 'hTTp://web1.net/a/b/c/welcome#intro' + => { 'scheme'=>'http', 'host'=>'web1.net', 'port'=>80, + 'path'=>'/a/b/c/welcome', 'frag'=>'intro','query'=>undef, + 'epath'=>'/a/b/c/welcome', 'equery'=>undef, + 'params'=>undef, 'eparams'=>undef, + 'as_string'=>'http://web1.net/a/b/c/welcome#intro', + 'full_path' => '/a/b/c/welcome' }, + + 'http://web:1/a?query+text' + => { 'scheme'=>'http', 'host'=>'web', 'port'=>1, + 'path'=>'/a', 'frag'=>undef, 'query'=>'query+text' }, + + 'http://web.net/' + => { 'scheme'=>'http', 'host'=>'web.net', 'port'=>80, + 'path'=>'/', 'frag'=>undef, 'query'=>undef, + 'full_path' => '/', + 'as_string' => 'http://web.net/' }, + + 'http://web.net' + => { 'scheme'=>'http', 'host'=>'web.net', 'port'=>80, + 'path'=>'/', 'frag'=>undef, 'query'=>undef, + 'full_path' => '/', + 'as_string' => 'http://web.net/' }, + + 'http:0' + => { 'scheme'=>'http', 'path'=>'0', 'query'=>undef, + 'as_string'=>'http:0', 'full_path'=>'0', }, + + 'http:/0?0' + => { 'scheme'=>'http', 'path'=>'/0', 'query'=>'0', + 'as_string'=>'http:/0?0', 'full_path'=>'/0?0', }, + + 'http://0:0/0/0;0?0#0' + => { 'scheme'=>'http', 'host'=>'0', 'port'=>'0', + 'path' => '/0/0', 'query'=>'0', 'params'=>'0', + 'netloc'=>'0:0', + 'frag'=>0, 'as_string'=>'http://0:0/0/0;0?0#0' }, + + 'ftp://0%3A:%40@h:0/0?0' + => { 'scheme'=>'ftp', 'user'=>'0:', 'password'=>'@', + 'host'=>'h', 'port'=>'0', 'path'=>'/0?0', + 'query'=>'0', params=>undef, + 'netloc'=>'0%3A:%40@h:0', + 'as_string'=>'ftp://0%3A:%40@h:0/0?0' }, + + 'ftp://usr:pswd@web:1234/a/b;type=i' + => { 'host'=>'web', 'port'=>1234, 'path'=>'/a/b', + 'user'=>'usr', 'password'=>'pswd', + 'params'=>'type=i', + 'as_string'=>'ftp://usr:pswd@web:1234/a/b;type=i' }, + + 'ftp://host/a/b' + => { 'host'=>'host', 'port'=>21, 'path'=>'/a/b', + 'user'=>'anonymous', + 'as_string'=>'ftp://host/a/b' }, + + 'file://host/fseg/fs?g/fseg' + # don't escape ? for file: scheme + => { 'host'=>'host', 'path'=>'/fseg/fs', + 'as_string'=>'file://host/fseg/fs?g/fseg' }, + + 'gopher://host' + => { 'gtype'=>'1', 'as_string' => 'gopher://host', }, + + 'gopher://host/' + => { 'gtype'=>'1', 'as_string' => 'gopher://host/', }, + + 'gopher://gopher/2a_selector' + => { 'gtype'=>'2', 'selector'=>'a_selector', + 'as_string' => 'gopher://gopher/2a_selector', }, + + 'mailto:libwww-perl@ics.uci.edu' + => { 'address' => 'libwww-perl@ics.uci.edu', + 'encoded822addr'=> 'libwww-perl@ics.uci.edu', +# 'user' => 'libwww-perl', +# 'host' => 'ics.uci.edu', + 'as_string' => 'mailto:libwww-perl@ics.uci.edu', }, + + 'news:*' + => { 'groupart'=>'*', 'group'=>'*', as_string=>'news:*' }, + 'news:comp.lang.perl' + => { 'group'=>'comp.lang.perl' }, + 'news:perl-faq/module-list-1-794455075@ig.co.uk' + => { 'article'=> + 'perl-faq/module-list-1-794455075@ig.co.uk' }, + + 'nntp://news.com/comp.lang.perl/42' + => { 'group'=>'comp.lang.perl', }, #'digits'=>42 }, + + 'telnet://usr:pswd@web:12345/' + => { 'user'=>'usr', 'password'=>'pswd', 'host'=>'web' }, + 'rlogin://aas@a.sn.no' + => { 'user'=>'aas', 'host'=>'a.sn.no' }, +# 'tn3270://aas@ibm' +# => { 'user'=>'aas', 'host'=>'ibm', +# 'as_string'=>'tn3270://aas@ibm/'}, + +# 'wais://web.net/db' +# => { 'database'=>'db' }, +# 'wais://web.net/db?query' +# => { 'database'=>'db', 'query'=>'query' }, +# 'wais://usr:pswd@web.net/db/wt/wp' +# => { 'database'=>'db', 'wtype'=>'wt', 'wpath'=>'wp', +# 'password'=>'pswd' }, + }; + + foreach my $url_str (sort keys %$tests ){ + note "Testing '$url_str'\n"; + my $url = new URI::URL $url_str; + my $tests = $tests->{$url_str}; + while( my ($method, $exp) = each %$tests ){ + is($url->$method, $exp, ref($url) . "->$method"); + } + } +} + + +##################################################################### +# +# parts_test() (calls netloc_test test) +# +# Test individual component part access functions +# +sub parts_test { + + # test storage part access/edit methods (netloc, user, password, + # host and port are tested by &netloc_test) + + $url = new URI::URL 'file://web/orig/path'; + $url->scheme('http'); + $url->path('1info'); + $url->query('key words'); + $url->frag('this'); + is($url->as_string, 'http://web/1info?key%20words#this', ref($url) . '->as_string'); + + $url->epath('%2f/%2f'); + $url->equery('a=%26'); + is($url->full_path, '/%2f/%2f?a=%26', ref($url) . '->full_path'); + + # At this point it should be impossible to access the members path() + # and query() without complaints. + eval { my $p = $url->path; note "Path is $p\n"; }; + fail "Path exception failed" unless $@; + eval { my $p = $url->query; note "Query is $p\n"; }; + fail "Query exception failed" unless $@; + + # but we should still be able to set it + $url->path("howdy"); + is($url->as_string, 'http://web/howdy?a=%26#this', ref($url) . '->as_string'); + + # Test the path_components function + $url = new URI::URL 'file:%2f/%2f'; + my $p; + $p = join('-', $url->path_components); + fail "\$url->path_components returns '$p', expected '/-/'" + unless $p eq "/-/"; + $url->host("localhost"); + $p = join('-', $url->path_components); + fail "\$url->path_components returns '$p', expected '-/-/'" + unless $p eq "-/-/"; + $url->epath("/foo/bar/"); + $p = join('-', $url->path_components); + fail "\$url->path_components returns '$p', expected '-foo-bar-'" + unless $p eq "-foo-bar-"; + $url->path_components("", "/etc", "\0", "..", "øse", ""); + is($url->full_path, '/%2Fetc/%00/../%F8se/', ref($url) . '->full_path'); + + # Setting undef + $url = new URI::URL 'http://web/p;p?q#f'; + $url->epath(undef); + $url->equery(undef); + $url->eparams(undef); + $url->frag(undef); + is($url->as_string, 'http://web', ref($url) . '->as_string'); + + # Test http query access methods + $url->keywords('dog'); + is($url->as_string, 'http://web?dog', ref($url) . '->as_string'); + $url->keywords(qw(dog bones)); + is($url->as_string, 'http://web?dog+bones', ref($url) . '->as_string'); + $url->keywords(0,0); + is($url->as_string, 'http://web?0+0', ref($url) . '->as_string'); + $url->keywords('dog', 'bones', '#+='); + is($url->as_string, 'http://web?dog+bones+%23%2B%3D', ref($url) . '->as_string'); + $a = join(":", $url->keywords); + is($a, 'dog:bones:#+=', "\$url->keywords"); + # calling query_form is an error +# eval { my $foo = $url->query_form; }; +# fail "\$url->query_form should croak since query contains keywords not a form." +# unless $@; + + $url->query_form(a => 'foo', b => 'bar'); + is($url->as_string, 'http://web?a=foo&b=bar', ref($url) . '->as_string'); + my %a = $url->query_form; + is_deeply( + \%a, + { a => 'foo', b => 'bar' }, + "\$url->query_form", + ); + + $url->query_form(a => undef, a => 'foo', '&=' => '&=+'); + is($url->as_string, 'http://web?a=&a=foo&%26%3D=%26%3D%2B', ref($url) . '->as_string'); + + my @a = $url->query_form; + is(scalar(@a), 6, 'length'); + is_deeply( + \@a, + [ + 'a', '', + 'a', 'foo', + '&=', '&=+', + ], + 'query_form', + ); + + # calling keywords is an error +# eval { my $foo = $url->keywords; }; +# die "\$url->keywords should croak when query is a form" +# unless $@; + # Try this odd one + $url->equery('&=&=b&a=&a&a=b=c&&a=b'); + @a = $url->query_form; + #note join(":", @a), "\n"; + is(scalar(@a), 16, 'length'); + ok( + $a[4] eq "" && $a[5] eq "b" && $a[10] eq "a" && $a[11] eq "b=c", + 'sequence', + ); + + # Try array ref values in the key value pairs + $url->query_form(a => ['foo', 'bar'], b => 'foo', c => ['bar', 'foo']); + is($url->as_string, 'http://web?a=foo&a=bar&b=foo&c=bar&c=foo', ref($url) . '->as_string'); + + subtest 'netloc_test' => \&netloc_test; + subtest 'port_test' => \&port_test; + + $url->query(undef); + is($url->query, undef, ref($url) . '->as_string'); + + $url = new URI::URL 'gopher://gopher/'; + $url->port(33); + $url->gtype("3"); + $url->selector("S"); + $url->search("query"); + is($url->as_string, 'gopher://gopher:33/3S%09query', ref($url) . '->as_string'); + + $url->epath("45%09a"); + is($url->gtype, '4', ref($url) . '->as_string'); + is($url->selector, '5', ref($url) . '->as_string'); + is($url->search, 'a', ref($url) . '->as_string'); + is($url->string, undef, ref($url) . '->as_string'); + is($url->path, "/45\ta", ref($url) . '->as_string'); +# $url->path("00\t%09gisle"); +# is($url->search '%09gisle', ref($url) . '->search'); + + # Let's test som other URL schemes + $url = new URI::URL 'news:'; + $url->group("comp.lang.perl.misc"); + is($url->as_string, 'news:comp.lang.perl.misc', ref($url) . '->as_string'); + $url->article('<1234@a.sn.no>'); + is($url->as_string, 'news:1234@a.sn.no', ref($url) . '->as_string: "<" and ">" are gone'); + + # This one should be illegal + eval { $url->article("no.perl"); }; + die "This one should really complain" unless $@; + +# $url = new URI::URL 'mailto:'; +# $url->user("aas"); +# $url->host("a.sn.no"); +# is($url->as_string, 'mailto:aas@a.sn.no', ref($url) . '->as_string'); +# $url->address('foo@bar'); +# is($url->host, 'bar', ref($url) . '->as_string'); +# is($url->user, 'foo', ref($url) . '->as_string'); + +# $url = new URI::URL 'wais://host/database/wt/wpath'; +# $url->database('foo'); +# is($url->as_string, 'wais://host/foo/wt/wpath', ref($url) . '->as_string'); +# $url->wtype('bar'); +# is($url->as_string, 'wais://host/foo/bar/wpath', ref($url) . '->as_string'); + + # Test crack method for various URLs + my(@crack, $crack); + @crack = URI::URL->new("http://host/path;param?query#frag")->crack; + is(scalar(@crack), 9, '9 elements'); + $crack = join("*", map { defined($_) ? $_ : "UNDEF" } @crack); + note "Cracked result: $crack"; + is($crack, "http*UNDEF*UNDEF*host*80*/path*param*query*frag", 'crack result'); + + @crack = URI::URL->new("foo/bar", "ftp://aas\@ftp.sn.no/")->crack; + is(scalar(@crack), 9, '9 elements'); + $crack = join("*", map { defined($_) ? $_ : "UNDEF" } @crack); + note "Cracked result: $crack"; +# die "Bad crack result" unless +# $crack eq "ftp*UNDEF*UNDEF*UNDEF*21*foo/bar*UNDEF*UNDEF*UNDEF"; + + @crack = URI::URL->new('ftp://u:p@host/q?path')->crack; + is(scalar(@crack), 9, '9 elements'); + $crack = join("*", map { defined($_) ? $_ : "UNDEF" } @crack); + note "Cracked result: $crack"; + is($crack, "ftp*u*p*host*21*/q?path*UNDEF*path*UNDEF", 'crack result'); + + @crack = URI::URL->new("ftp://ftp.sn.no/pub")->crack; # Test anon ftp + is(scalar(@crack), 9, '9 elements'); + ok($crack[2], "passwd in anonymous crack"); + $crack[2] = 'passwd'; # easier to test when we know what it is + $crack = join("*", map { defined($_) ? $_ : "UNDEF" } @crack); + note "Cracked result: $crack"; + is($crack, "ftp*anonymous*passwd*ftp.sn.no*21*/pub*UNDEF*UNDEF*UNDEF", 'crack result'); + + @crack = URI::URL->new('mailto:aas@sn.no')->crack; + is(scalar(@crack), 9, '9 elements'); + $crack = join("*", map { defined($_) ? $_ : "UNDEF" } @crack); + note "Cracked result: $crack"; +# die "Bad crack result" unless +# $crack eq "mailto*aas*UNDEF*sn.no*UNDEF*aas\@sn.no*UNDEF*UNDEF*UNDEF"; + + @crack = URI::URL->new('news:comp.lang.perl.misc')->crack; + is(scalar(@crack), 9, '9 elements'); + $crack = join("*", map { defined($_) ? $_ : "UNDEF" } @crack); + note "Cracked result: $crack"; + is($crack, "news*UNDEF*UNDEF*UNDEF*119*comp.lang.perl.misc*UNDEF*UNDEF*UNDEF", 'crack result'); +} + +# +# netloc_test() +# +# Test automatic netloc synchronisation +# +sub netloc_test { + + my $url = new URI::URL 'ftp://anonymous:p%61ss@håst:12345'; + is($url->user, 'anonymous', ref($url) . '->as_string'); + is($url->password, 'pass', ref($url) . '->as_string'); + is($url->host, 'xn--hst-ula', ref($url) . '->as_string'); + is($url->port, 12345, ref($url) . '->as_string'); + # Can't really know how netloc is represented since it is partially escaped + #is($url->netloc, 'anonymous:pass@hst:12345', ref($url) . '->as_string'); + is($url->as_string, 'ftp://anonymous:pass@xn--hst-ula:12345', ref($url) . '->as_string'); + + # The '0' is sometimes tricky to get right + $url->user(0); + $url->password(0); + $url->host(0); + $url->port(0); + is($url->netloc, '0:0@0:0', ref($url) . '->as_string'); + $url->host(undef); + is($url->netloc, '0:0@:0', ref($url) . '->as_string'); + $url->host('h'); + $url->user(undef); + is($url->netloc, ':0@h:0', ref($url) . '->as_string'); + $url->user(''); + is($url->netloc, ':0@h:0', ref($url) . '->as_string'); + $url->password(''); + is($url->netloc, ':@h:0', ref($url) . '->as_string'); + $url->user('foo'); + is($url->netloc, 'foo:@h:0', ref($url) . '->as_string'); + + # Let's try a simple one + $url->user('nemo'); + $url->password('p2'); + $url->host('hst2'); + $url->port(2); + is($url->netloc, 'nemo:p2@hst2:2', ref($url) . '->as_string'); + + $url->user(undef); + $url->password(undef); + $url->port(undef); + is($url->netloc, 'hst2', ref($url) . '->as_string'); + is($url->port, '21', ref($url) . '->as_string'); # the default ftp port + + $url->port(21); + is($url->netloc, 'hst2:21', ref($url) . '->as_string'); + + # Let's try some reserved chars + $url->user("@"); + $url->password(":-#-;-/-?"); + is($url->as_string, 'ftp://%40::-%23-;-%2F-%3F@hst2:21', ref($url) . '->as_string'); + +} + +# +# port_test() +# +# Test port behaviour +# +sub port_test { + $url = URI::URL->new('http://foo/root/dir/'); + my $port = $url->port; + is($port, 80, 'port'); + is($url->as_string, 'http://foo/root/dir/', 'string'); + + $url->port(8001); + $port = $url->port; + is($port, 8001, 'port'); + is($url->as_string, 'http://foo:8001/root/dir/', 'string'); + + $url->port(80); + $port = $url->port; + is($port, 80, 'port'); + is($url->canonical->as_string, 'http://foo/root/dir/', 'string'); + + $url->port(8001); + $url->port(undef); + $port = $url->port; + is($port, 80, 'port'); + is($url->canonical->as_string, 'http://foo/root/dir/', 'string'); +} + + +##################################################################### +# +# escape_test() +# +# escaping functions + +sub escape_test { + # supply escaped URL + $url = new URI::URL 'http://web/this%20has%20spaces'; + # check component is unescaped + is($url->path, '/this has spaces', ref($url) . '->as_string'); + + # modify the unescaped form + $url->path('this ALSO has spaces'); + # check whole url is escaped + is($url->as_string, + 'http://web/this%20ALSO%20has%20spaces', ref($url) . '->as_string'); + + $url = new URI::URL uri_escape('http://web/try %?#" those'); + is($url->as_string, + 'http%3A%2F%2Fweb%2Ftry%20%25%3F%23%22%20those', ref($url) . '->as_string'); + + my $all = pack('C*',0..255); + my $esc = uri_escape($all); + my $new = uri_unescape($esc); + is($all, $new, "uri_escape->uri_unescape"), + + $url->path($all); + is($url->full_path, q(%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20!%22%23$%&'()*+,-./0123456789:;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ[%5C]%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%7F%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF), ref($url) . '->as_string'); + + # test escaping uses uppercase (preferred by rfc1837) + $url = new URI::URL 'file://h/'; + $url->path(chr(0x7F)); + is($url->as_string, 'file://h/%7F', ref($url) . '->as_string'); + + return; + # reserved characters differ per scheme + + ## XXX is this '?' allowed to be unescaped + $url = new URI::URL 'file://h/test?ing'; + is($url->path, '/test?ing', ref($url) . '->as_string'); + + $url = new URI::URL 'file://h/'; + $url->epath('question?mark'); + is($url->as_string, 'file://h/question?mark', ref($url) . '->as_string'); + # XXX Why should this be any different??? + # Perhaps we should not expect too much :-) + $url->path('question?mark'); + is($url->as_string, 'file://h/question%3Fmark', ref($url) . '->as_string'); + + # See what happens when set different elements to this ugly sting + my $reserved = ';/?:@&=#%'; + $url->path($reserved . "foo"); + is($url->as_string, 'file://h/%3B/%3F%3A%40%26%3D%23%25foo', ref($url) . '->as_string'); + + $url->scheme('http'); + $url->path(''); + is($url->as_string, 'http://h/', ref($url) . '->as_string'); + $url->query($reserved); + $url->params($reserved); + $url->frag($reserved); + is($url->as_string, 'http://h/;%3B%2F%3F%3A%40&=%23%25?%3B%2F%3F%3A%40&=%23%25#;/?:@&=#%', ref($url) . '->as_string'); + + my $str = $url->as_string; + $url = new URI::URL $str; + die "URL changed" if $str ne $url->as_string; + + $url = new URI::URL 'ftp:foo'; + $url->user($reserved); + $url->host($reserved); + is($url->as_string, 'ftp://%3B%2F%3F%3A%40%26%3D%23%25@%3B%2F%3F%3A%40%26%3D%23%25/foo', ref($url) . '->as_string'); + +} + + +##################################################################### +# +# newlocal_test() +# + +sub newlocal_test { + return 1 if $^O eq "MacOS"; + + my $isMSWin32 = ($^O =~ /MSWin32/i); + my $pwd = ($isMSWin32 ? 'cd' : + ($^O eq 'qnx' ? '/usr/bin/fullpath -t' : + ($^O eq 'VMS' ? 'show default' : + (-e '/bin/pwd' ? '/bin/pwd' : 'pwd')))); + my $tmpdir = tempdir(); + if ( $^O eq 'qnx' ) { + $tmpdir = `/usr/bin/fullpath -t $tmpdir`; + chomp $tmpdir; + } + $tmpdir = '/sys$scratch' if $^O eq 'VMS'; + $tmpdir =~ tr|\\|/|; + + my $savedir = `$pwd`; # we don't use Cwd.pm because we want to check + # that it get require'd correctly by URL.pm + chomp $savedir; + if ($^O eq 'VMS') { + $savedir =~ s#^\s+##; + $savedir = VMS::Filespec::unixpath($savedir); + $savedir =~ s#/$##; + } + + # cwd + chdir($tmpdir) or die $!; + my $dir = `$pwd`; $dir =~ tr|\\|/|; + chomp $dir; + if ($^O eq 'VMS') { + $dir =~ s#^\s+##; + $dir = VMS::Filespec::unixpath($dir); + $dir =~ s#/$##; + } + $dir = uri_escape($dir, ':'); + $dir =~ s/^(\w)%3A/$1:/ if $isMSWin32 or $^O eq 'os2'; + $url = newlocal URI::URL; + my $ss = $isMSWin32 ? '//' : (($dir =~ m,^/,) ? '' : '///' ); + is($url->as_string, URI::URL->new("file:$ss$dir/")->as_string, ref($url) . '->as_string'); + + note "Local directory is ". $url->local_path . "\n"; + + if ($^O ne 'VMS') { + # absolute dir + chdir('/') or die $!; + $url = newlocal URI::URL '/usr/'; + is($url->as_string, 'file:/usr/', ref($url) . '->as_string'); + + # absolute file + $url = newlocal URI::URL '/vmunix'; + is($url->as_string, 'file:/vmunix', ref($url) . '->as_string'); + } + + # relative file + chdir($tmpdir) or fail $!; + $dir = `$pwd`; $dir =~ tr|\\|/|; + chomp $dir; + if ($^O eq 'VMS') { + $dir =~ s#^\s+##; + $dir = VMS::Filespec::unixpath($dir); + $dir =~ s#/$##; + } + $dir = uri_escape($dir, ':'); + $dir =~ s/^(\w)%3A/$1:/ if $isMSWin32 or $^O eq 'os2'; + $url = newlocal URI::URL 'foo'; + is($url->as_string, "file:$ss$dir/foo", ref($url) . '->as_string'); + + # relative dir + chdir($tmpdir) or fail $!; + $dir = `$pwd`; $dir =~ tr|\\|/|; + chomp $dir; + if ($^O eq 'VMS') { + $dir =~ s#^\s+##; + $dir = VMS::Filespec::unixpath($dir); + $dir =~ s#/$##; + } + $dir = uri_escape($dir, ':'); + $dir =~ s/^(\w)%3A/$1:/ if $isMSWin32 or $^O eq 'os2'; + $url = newlocal URI::URL 'bar/'; + is($url->as_string, "file:$ss$dir/bar/", ref($url) . '->as_string'); + + # 0 + if ($^O ne 'VMS') { + chdir('/') or fail $!; + $dir = `$pwd`; $dir =~ tr|\\|/|; + chomp $dir; + $dir = uri_escape($dir, ':'); + $dir =~ s/^(\w)%3A/$1:/ if $isMSWin32 or $^O eq 'os2'; + $url = newlocal URI::URL '0'; + is($url->as_string, "file:$ss${dir}0", ref($url) . '->as_string'); + } + + # Test access methods for file URLs + $url = new URI::URL 'file:/c:/dos'; + is($url->dos_path, 'C:\\DOS', ref($url) . '->as_string'); + is($url->unix_path, '/c:/dos', ref($url) . '->as_string'); + #is($url->vms_path, '[C:]DOS', ref($url) . '->as_string'); + is($url->mac_path, undef, ref($url) . '->as_string'); + + $url = new URI::URL 'file:/foo/bar'; + is($url->unix_path, '/foo/bar', ref($url) . '->as_string'); + is($url->mac_path, 'foo:bar', ref($url) . '->as_string'); + + # Some edge cases +# $url = new URI::URL 'file:'; +# is($url->unix_path, '/', ref($url) . '->as_string'); + $url = new URI::URL 'file:/'; + is($url->unix_path, '/', ref($url) . '->as_string'); + $url = new URI::URL 'file:.'; + is($url->unix_path, '.', ref($url) . '->as_string'); + $url = new URI::URL 'file:./foo'; + is($url->unix_path, './foo', ref($url) . '->as_string'); + $url = new URI::URL 'file:0'; + is($url->unix_path, '0', ref($url) . '->as_string'); + $url = new URI::URL 'file:../../foo'; + is($url->unix_path, '../../foo', ref($url) . '->as_string'); + $url = new URI::URL 'file:foo/../bar'; + is($url->unix_path, 'foo/../bar', ref($url) . '->as_string'); + + # Relative files + $url = new URI::URL 'file:foo/b%61r/Note.txt'; + is($url->unix_path, 'foo/bar/Note.txt', ref($url) . '->as_string'); + is($url->mac_path, ':foo:bar:Note.txt', ref($url) . '->as_string'); + is($url->dos_path, 'FOO\\BAR\\NOTE.TXT', ref($url) . '->as_string'); + #is($url->vms_path', '[.FOO.BAR]NOTE.TXT', ref($url) . '->as_string'); + + # The VMS path found in RFC 1738 (section 3.10) + $url = new URI::URL 'file://vms.host.edu/disk$user/my/notes/note12345.txt'; +# is($url->vms_path, 'DISK$USER:[MY.NOTES]NOTE12345.TXT', ref($url) . '->as_string'); +# is($url->mac_path, 'disk$user:my:notes:note12345.txt', ref($url) . '->as_string'); + + chdir($savedir) or fail $!; +} + + +##################################################################### +# +# absolute_test() +# +sub absolute_test { + # Tests from draft-ietf-uri-relative-url-06.txt + # Copied verbatim from the draft, parsed below + + @URI::URL::g::ISA = qw(URI::URL::_generic); # for these tests + + my $base = 'http://a/b/c/d;p?q#f'; + + my $absolute_tests = <<EOM; +5.1. Normal Examples + + g:h = <URL:g:h> + g = <URL:http://a/b/c/g> + ./g = <URL:http://a/b/c/g> + g/ = <URL:http://a/b/c/g/> + /g = <URL:http://a/g> + //g = <URL:http://g> +# ?y = <URL:http://a/b/c/d;p?y> + g?y = <URL:http://a/b/c/g?y> + g?y/./x = <URL:http://a/b/c/g?y/./x> + #s = <URL:http://a/b/c/d;p?q#s> + g#s = <URL:http://a/b/c/g#s> + g#s/./x = <URL:http://a/b/c/g#s/./x> + g?y#s = <URL:http://a/b/c/g?y#s> + # ;x = <URL:http://a/b/c/d;x> + g;x = <URL:http://a/b/c/g;x> + g;x?y#s = <URL:http://a/b/c/g;x?y#s> + . = <URL:http://a/b/c/> + ./ = <URL:http://a/b/c/> + .. = <URL:http://a/b/> + ../ = <URL:http://a/b/> + ../g = <URL:http://a/b/g> + ../.. = <URL:http://a/> + ../../ = <URL:http://a/> + ../../g = <URL:http://a/g> + +5.2. Abnormal Examples + + Although the following abnormal examples are unlikely to occur + in normal practice, all URL parsers should be capable of resolving + them consistently. Each example uses the same base as above. + + An empty reference resolves to the complete base URL: + + <> = <URL:http://a/b/c/d;p?q#f> + + Parsers must be careful in handling the case where there are more + relative path ".." segments than there are hierarchical levels in + the base URL's path. Note that the ".." syntax cannot be used to + change the <net_loc> of a URL. + + ../../../g = <URL:http://a/../g> + ../../../../g = <URL:http://a/../../g> + + Similarly, parsers must avoid treating "." and ".." as special + when they are not complete components of a relative path. + + /./g = <URL:http://a/./g> + /../g = <URL:http://a/../g> + g. = <URL:http://a/b/c/g.> + .g = <URL:http://a/b/c/.g> + g.. = <URL:http://a/b/c/g..> + ..g = <URL:http://a/b/c/..g> + + Less likely are cases where the relative URL uses unnecessary or + nonsensical forms of the "." and ".." complete path segments. + + ./../g = <URL:http://a/b/g> + ./g/. = <URL:http://a/b/c/g/> + g/./h = <URL:http://a/b/c/g/h> + g/../h = <URL:http://a/b/c/h> + + Finally, some older parsers allow the scheme name to be present in + a relative URL if it is the same as the base URL scheme. This is + considered to be a loophole in prior specifications of partial + URLs [1] and should be avoided by future parsers. + + http:g = <URL:http:g> + http: = <URL:http:> +EOM + # convert text to list like + # @absolute_tests = ( ['g:h' => 'g:h'], ...) + + my @absolute_tests; + for my $line (split("\n", $absolute_tests)) { + next unless $line =~ /^\s{6}/; + if ($line =~ /^\s+(\S+)\s*=\s*<URL:([^>]*)>/) { + my($rel, $abs) = ($1, $2); + $rel = '' if $rel eq '<>'; + push(@absolute_tests, [$rel, $abs]); + } + else { + warn "illegal line '$line'"; + } + } + + # add some extra ones for good measure + + push(@absolute_tests, ['x/y//../z' => 'http://a/b/c/x/y/z'], + ['1' => 'http://a/b/c/1' ], + ['0' => 'http://a/b/c/0' ], + ['/0' => 'http://a/0' ], +# ['%2e/a' => 'http://a/b/c/%2e/a'], # %2e is '.' +# ['%2e%2e/a' => 'http://a/b/c/%2e%2e/a'], + ); + + note " Relative + Base => Expected Absolute URL"; + note "================================================\n"; + for my $test (@absolute_tests) { + my($rel, $abs) = @$test; + my $abs_url = new URI::URL $abs; + my $abs_str = $abs_url->as_string; + + note sprintf(" %-10s + $base => %s", $rel, $abs); + my $u = new URI::URL $rel, $base; + my $got = $u->abs; + is($got->as_string, $abs_str, ref($url) . '->as_string'); + } + + # bug found and fixed in 1.9 by "J.E. Fritz" <FRITZ@gems.vcu.edu> + $base = new URI::URL 'http://host/directory/file'; + my $relative = new URI::URL 'file', $base; + my $result = $relative->abs; + + my ($a, $b) = ($base->path, $result->path); + is($a, $b, 'identity'); + + # Counter the expectation of least surprise, + # section 6 of the draft says the URL should + # be canonicalised, rather than making a simple + # substitution of the last component. + # Better doublecheck someone hasn't "fixed this bug" :-) + $base = new URI::URL 'http://host/dir1/../dir2/file'; + $relative = new URI::URL 'file', $base; + $result = $relative->abs; + is($result, 'http://host/dir2/file', 'URL canonicalised'); + + note "--------"; + # Test various other kinds of URLs and how they like to be absolutized + for (["http://abc/", "news:45664545", "http://abc/"], + ["news:abc", "http://abc/", "news:abc"], + ["abc", "file:/test?aas", "file:/abc"], +# ["gopher:", "", "gopher:"], +# ["?foo", "http://abc/a", "http://abc/a?foo"], + ["?foo", "file:/abc", "file:/abc?foo"], + ["#foo", "http://abc/a", "http://abc/a#foo"], + ["#foo", "file:a", "file:a#foo"], + ["#foo", "file:/a", "file:/a#foo"], + ["#foo", "file:/a", "file:/a#foo"], + ["#foo", "file://localhost/a", "file://localhost/a#foo"], + ['123@sn.no', "news:comp.lang.perl.misc", 'news:/123@sn.no'], + ['no.perl', 'news:123@sn.no', 'news:/no.perl'], + ['mailto:aas@a.sn.no', "http://www.sn.no/", 'mailto:aas@a.sn.no'], + + # Test absolutizing with old behaviour. + ['http:foo', 'http://h/a/b', 'http://h/a/foo'], + ['http:/foo', 'http://h/a/b', 'http://h/foo'], + ['http:?foo', 'http://h/a/b', 'http://h/a/b?foo'], + ['http:#foo', 'http://h/a/b', 'http://h/a/b#foo'], + ['http:?foo#bar','http://h/a/b', 'http://h/a/b?foo#bar'], + ['file:/foo', 'http://h/a/b', 'file:/foo'], + + ) + { + my($url, $base, $expected_abs) = @$_; + my $rel = new URI::URL $url, $base; + my $abs = $rel->abs($base, 1); + note sprintf(" %-12s+ $base => %s", $rel, $abs); + is($abs->as_string, $expected_abs, ref($url) . '->as_string'); + } + note "absolute test ok\n"; + + # Test relative function + for ( + ["http://abc/a", "http://abc", "a"], + ["http://abc/a", "http://abc/b", "a"], + ["http://abc/a?q", "http://abc/b", "a?q"], + ["http://abc/a;p", "http://abc/b", "a;p"], + ["http://abc/a", "http://abc/a/b/c/", "../../../a"], + ["http://abc/a/", "http://abc/a/", "./"], + ["http://abc/a#f", "http://abc/a", "#f"], + + ["file:/etc/motd", "file:/", "etc/motd"], + ["file:/etc/motd", "file:/etc/passwd", "motd"], + ["file:/etc/motd", "file:/etc/rc2.d/", "../motd"], + ["file:/etc/motd", "file:/usr/lib/doc", "../../etc/motd"], + ["file:", "file:/etc/", "../"], + ["file:foo", "file:/etc/", "../foo"], + + ["mailto:aas", "http://abc", "mailto:aas"], + + # Nicolai Langfeldt's original example + ["http://www.math.uio.no/doc/mail/top.html", + "http://www.math.uio.no/doc/linux/", "../mail/top.html"], + ) + { + my($abs, $base, $expect) = @$_; + my $rel = URI::URL->new($abs, $base)->rel; + is($rel->as_string, $expect, "url('$abs', '$base')->rel = '$expect'"); + } + note "relative test ok\n"; +} + + +sub eq_test +{ + my $u1 = new URI::URL 'http://abc.com:80/~smith/home.html'; + my $u2 = new URI::URL 'http://ABC.com/%7Esmith/home.html'; + my $u3 = new URI::URL 'http://ABC.com:/%7esmith/home.html'; + + # Test all permutations of these tree + ok($u1->eq($u2), "1: $u1 ne $u2"); + ok($u1->eq($u3), "2: $u1 ne $u3"); + ok($u2->eq($u1), "3: $u2 ne $u1"); + ok($u2->eq($u3), "4: $u2 ne $u3"); + ok($u3->eq($u1), "5: $u3 ne $u1"); + ok($u3->eq($u2), "6: $u3 ne $u2"); + + # Test empty path + my $u4 = new URI::URL 'http://www.sn.no'; + ok($u4->eq("HTTP://WWW.SN.NO:80/"), "7: $u4"); + ok(!$u4->eq("http://www.sn.no:81"),"8: $u4"); + + # Test mailto +# my $u5 = new URI::URL 'mailto:AAS@SN.no'; +# ok($u5->eq('mailto:aas@sn.no'), "9: $u5"); + + + # Test reserved char + my $u6 = new URI::URL 'ftp://ftp/%2Fetc'; + ok($u6->eq("ftp://ftp/%2fetc"), "10: $u6"); + ok(!$u6->eq("ftp://ftp://etc"), "11: $u6"); +} diff --git a/t/old-file.t b/t/old-file.t new file mode 100644 index 0000000..e1ab8f5 --- /dev/null +++ b/t/old-file.t @@ -0,0 +1,81 @@ +use strict; +use warnings; + +use URI::file; +$URI::file::DEFAULT_AUTHORITY = undef; + +my @tests = ( +[ "file", "unix", "win32", "mac" ], +#---------------- ------------ --------------- -------------- +[ "file://localhost/foo/bar", + "!/foo/bar", "!\\foo\\bar", "!foo:bar", ], +[ "file:///foo/bar", + "!/foo/bar", "!\\foo\\bar", "!foo:bar", ], +[ "file:/foo/bar", "/foo/bar", "\\foo\\bar", "foo:bar", ], +[ "foo/bar", "foo/bar", "foo\\bar", ":foo:bar",], +[ "file://foo3445x/bar","!//foo3445x/bar", "\\\\foo3445x\\bar", "!foo3445x:bar"], +[ "file://a:/", "!//a:/", "!A:\\", undef], +[ "file:/", "/", "\\", undef], +[ "file://A:relative/", "!//A:relative/", "A:", undef], +[ ".", ".", ".", ":"], +[ "..", "..", "..", "::"], +[ "%2E", "!.", "!.", ":."], +[ "../%2E%2E", "!../..", "!..\\..", "::.."], +); +if ($^O eq "MacOS") { +my @extratests = ( +[ "../..", "../..", "..\\..", ":::"], +[ "../../", "../../", "..\\..\\", "!:::"], +[ "file:./foo.bar", "!./foo.bar", "!.\\foo.bar", "!:foo.bar"], +[ "file:/%2Ffoo/bar", undef, undef, "/foo:bar"], +[ "file:/.%2Ffoo/bar", undef, undef, "./foo:bar"], +[ "file:/fee/.%2Ffoo%2Fbar", undef, undef, "fee:./foo/bar"], +[ "file:/.%2Ffoo%2Fbar/", undef, undef, "./foo/bar:"], +[ "file:/.%2Ffoo%2Fbar", undef, undef, "!./foo/bar:"], +[ "file:/%2E%2E/foo", "!/../foo", "!\\..\\foo" , "..:foo"], +[ "file:/bar/%2E/foo", "!/bar/./foo", "!\\bar\\.\\foo", "bar:.:foo"], +[ "file:/foo/../bar", "/foo/../bar", "\\foo\\..\\bar", "foo::bar"], +[ "file:/a/b/../../c/d", "/a/b/../../c/d", "\\a\\b\\..\\..\\c\\d", "a:b:::c:d"], +); + push(@tests,@extratests); +} + +my @os = @{shift @tests}; +shift @os; # file + +my $num = @tests; +print "1..$num\n"; + +my $testno = 1; + +for my $t (@tests) { + my @t = @$t; + my $file = shift @t; + my $err; + + my $u = URI->new($file, "file"); + my $i = 0; + for my $os (@os) { + my $f = $u->file($os); + my $expect = $t[$i]; + $f = "<undef>" unless defined $f; + $expect = "<undef>" unless defined $expect; + my $loose; + $loose++ if $expect =~ s/^!//; + if ($expect ne $f) { + print "URI->new('$file', 'file')->file('$os') ne $expect, but $f\n"; + $err++; + } + if (defined($t[$i]) && !$loose) { + my $u2 = URI::file->new($t[$i], $os); + unless ($u2->as_string eq $file) { + print "URI::file->new('$t[$i]', '$os') ne $file, but $u2\n"; + $err++; + } + } + $i++; + } + print "not " if $err; + print "ok $testno\n"; + $testno++; +} diff --git a/t/old-relbase.t b/t/old-relbase.t new file mode 100644 index 0000000..3bd0ae8 --- /dev/null +++ b/t/old-relbase.t @@ -0,0 +1,37 @@ +use strict; +use warnings; + +print "1..5\n"; + +use URI::URL; + +# We used to have problems with URLs that used a base that was +# not absolute itself. + +my $u1 = url("/foo/bar", "http://www.acme.com/"); +my $u2 = url("../foo/", $u1); +my $u3 = url("zoo/foo", $u2); + +my $a1 = $u1->abs->as_string; +my $a2 = $u2->abs->as_string; +my $a3 = $u3->abs->as_string; + +print "$a1\n$a2\n$a3\n"; + +print "not " unless $a1 eq "http://www.acme.com/foo/bar"; +print "ok 1\n"; +print "not " unless $a2 eq "http://www.acme.com/foo/"; +print "ok 2\n"; +print "not " unless $a3 eq "http://www.acme.com/foo/zoo/foo"; +print "ok 3\n"; + +# We used to have problems with URI::URL as the base class :-( +my $u4 = url("foo", "URI::URL"); +my $a4 = $u4->abs; +print "$a4\n"; +print "not " unless $u4 eq "foo" && $a4 eq "uri:/foo"; +print "ok 4\n"; + +# Test new_abs for URI::URL objects +print "not " unless URI::URL->new_abs("foo", "http://foo/bar") eq "http://foo/foo"; +print "ok 5\n"; diff --git a/t/path-segments.t b/t/path-segments.t new file mode 100755 index 0000000..ea9b4fa --- /dev/null +++ b/t/path-segments.t @@ -0,0 +1,33 @@ +use strict; +use warnings; + +use Test::More 'no_plan'; + +use URI (); + +{ + my $u = URI->new("http://www.example.org/a/b/c"); + + is_deeply [$u->path_segments], ['', qw(a b c)], 'path_segments in list context'; + is $u->path_segments, '/a/b/c', 'path_segments in scalar context'; + + is_deeply [$u->path_segments('', qw(z y x))], ['', qw(a b c)], 'set path_segments in list context'; + is $u->path_segments('/i/j/k'), '/z/y/x', 'set path_segments in scalar context'; + + $u->path_segments('', qw(q r s)); + is $u->path_segments, '/q/r/s', 'set path_segments in void context'; +} + +{ + my $u = URI->new("http://www.example.org/abc"); + $u->path_segments('', '%', ';', '/'); + is $u->path_segments, '/%25/%3B/%2F', 'escaping special characters'; +} + +{ + my $u = URI->new("http://www.example.org/abc;param1;param2"); + my @ps = $u->path_segments; + isa_ok $ps[1], 'URI::_segment'; + $u->path_segments(@ps); + is $u->path_segments, '/abc;param1;param2', 'dealing with URI segments'; +} @@ -0,0 +1,50 @@ +use strict; +use warnings; + +print "1..8\n"; + +use URI; + +my $u = URI->new('pop://aas@pop.sn.no'); + +print "not " unless $u->user eq "aas" && + !defined($u->auth) && + $u->host eq "pop.sn.no" && + $u->port == 110 && + $u eq 'pop://aas@pop.sn.no'; +print "ok 1\n"; + +$u->auth("+APOP"); +print "not " unless $u->auth eq "+APOP" && + $u eq 'pop://aas;AUTH=+APOP@pop.sn.no'; +print "ok 2\n"; + +$u->user("gisle"); +print "not " unless $u->user eq "gisle" && + $u eq 'pop://gisle;AUTH=+APOP@pop.sn.no'; +print "ok 3\n"; + +$u->port(4000); +print "not " unless $u eq 'pop://gisle;AUTH=+APOP@pop.sn.no:4000'; +print "ok 4\n"; + +$u = URI->new("pop:"); +$u->host("pop.sn.no"); +$u->user("aas"); +$u->auth("*"); +print "not " unless $u eq 'pop://aas;AUTH=*@pop.sn.no'; +print "ok 5\n"; + +$u->auth(undef); +print "not " unless $u eq 'pop://aas@pop.sn.no'; +print "ok 6\n"; + +$u->user(undef); +print "not " unless $u eq 'pop://pop.sn.no'; +print "ok 7\n"; + +# Try some funny characters too +$u->user('får;k@l'); +print "not " unless $u->user eq 'får;k@l' && + $u eq 'pop://f%E5r%3Bk%40l@pop.sn.no'; +print "ok 8\n"; diff --git a/t/punycode.t b/t/punycode.t new file mode 100644 index 0000000..d1e3084 --- /dev/null +++ b/t/punycode.t @@ -0,0 +1,56 @@ +use strict; +use warnings; + +use utf8; +use Test::More tests => 15; +use URI::_punycode qw(encode_punycode decode_punycode); + +my %RFC_3492 = ( + A => { + unicode => udecode("u+0644 u+064A u+0647 u+0645 u+0627 u+0628 u+062A u+0643 u+0644 u+0645 u+0648 u+0634 u+0639 u+0631 u+0628 u+064A u+061F"), + ascii => "egbpdaj6bu4bxfgehfvwxn", + }, + B => { + unicode => udecode("u+4ED6 u+4EEC u+4E3A u+4EC0 u+4E48 u+4E0D u+8BF4 u+4E2D u+6587"), + ascii => "ihqwcrb4cv8a8dqg056pqjye", + }, + E => { + unicode => udecode("u+05DC u+05DE u+05D4 u+05D4 u+05DD u+05E4 u+05E9 u+05D5 u+05D8 u+05DC u+05D0 u+05DE u+05D3 u+05D1 u+05E8 u+05D9 u+05DD u+05E2 u+05D1 u+05E8 u+05D9 u+05EA"), + ascii => "4dbcagdahymbxekheh6e0a7fei0b", + }, + J => { + unicode => udecode("U+0050 u+006F u+0072 u+0071 u+0075 u+00E9 u+006E u+006F u+0070 u+0075 u+0065 u+0064 u+0065 u+006E u+0073 u+0069 u+006D u+0070 u+006C u+0065 u+006D u+0065 u+006E u+0074 u+0065 u+0068 u+0061 u+0062 u+006C u+0061 u+0072 u+0065 u+006E U+0045 u+0073 u+0070 u+0061 u+00F1 u+006F u+006C"), + ascii => "PorqunopuedensimplementehablarenEspaol-fmd56a", + }, + K => { + unicode => udecode("U+0054 u+1EA1 u+0069 u+0073 u+0061 u+006F u+0068 u+1ECD u+006B u+0068 u+00F4 u+006E u+0067 u+0074 u+0068 u+1EC3 u+0063 u+0068 u+1EC9 u+006E u+00F3 u+0069 u+0074 u+0069 u+1EBF u+006E u+0067 U+0056 u+0069 u+1EC7 u+0074"), + ascii => "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", + }, + O => { + unicode => udecode("u+3072 u+3068 u+3064 u+5C4B u+6839 u+306E u+4E0B u+0032"), + ascii => "2-u9tlzr9756bt3uc0v", + }, + S => { + unicode => "\$1.00", + ascii => "\$1.00", + }, +); + +is encode_punycode("bücher"), "bcher-kva", "http://en.wikipedia.org/wiki/Punycode example encode"; +is decode_punycode("bcher-kva"), "bücher", "http://en.wikipedia.org/wiki/Punycode example decode"; + +for my $test_key (sort keys %RFC_3492) { + my $test = $RFC_3492{$test_key}; + is encode_punycode($test->{unicode}), $test->{ascii}, "$test_key encode"; + is decode_punycode($test->{ascii}), $test->{unicode}, "$test_key decode" unless $test_key eq "S"; +} + +sub udecode { + my $str = shift; + my @u; + for (split(" ", $str)) { + /^[uU]\+[\dA-F]{2,4}$/ || die "Unexpected ucode: $_"; + push(@u, chr(hex(substr($_, 2)))); + } + return join("", @u); +} diff --git a/t/query-param.t b/t/query-param.t new file mode 100644 index 0000000..fc852c0 --- /dev/null +++ b/t/query-param.t @@ -0,0 +1,71 @@ +use strict; +use warnings; + +use Test::More tests => 19; + +use URI; +use URI::QueryParam; + +my $u = URI->new("http://www.sol.no?foo=4&bar=5&foo=5"); + +is_deeply( + $u->query_form_hash, + { foo => [ 4, 5 ], bar => 5 }, + 'query_form_hash get' +); + +$u->query_form_hash({ a => 1, b => 2}); +ok $u->query eq "a=1&b=2" || $u->query eq "b=2&a=1", 'query_form_hash set'; + +$u->query("a=1&b=2&a=3&b=4&a=5"); +is join(':', $u->query_param), "a:b", 'query_param list keys'; + +is $u->query_param("a"), "1", "query_param scalar return"; +is join(":", $u->query_param("a")), "1:3:5", "query_param list return"; + +is $u->query_param(a => 11 .. 15), 1, "query_param set return"; + +is $u->query, "a=11&b=2&a=12&b=4&a=13&a=14&a=15", "param order"; + +is join(":", $u->query_param(a => 11)), "11:12:13:14:15", "old values returned"; + +is $u->query, "a=11&b=2&b=4"; + +is $u->query_param_delete("a"), "11", 'query_param_delete'; + +is $u->query, "b=2&b=4"; + +$u->query_param_append(a => 1, 3, 5); +$u->query_param_append(b => 6); + +is $u->query, "b=2&b=4&a=1&a=3&a=5&b=6"; + +$u->query_param(a => []); # same as $u->query_param_delete("a"); + +is $u->query, "b=2&b=4&b=6", 'delete by assigning empty list'; + +$u->query(undef); +$u->query_param(a => 1, 2, 3); +$u->query_param(b => 1); + +is $u->query, 'a=1&a=2&a=3&b=1', 'query_param from scratch'; + +$u->query_param_delete('a'); +$u->query_param_delete('b'); + +ok ! $u->query; + +is $u->as_string, 'http://www.sol.no'; + +$u->query(undef); +$u->query_param(a => 1, 2, 3); +$u->query_param(b => 1); + +is $u->query, 'a=1&a=2&a=3&b=1'; + +$u->query_param('a' => []); +$u->query_param('b' => []); + +ok ! $u->query; + +is $u->as_string, 'http://www.sol.no'; diff --git a/t/query.t b/t/query.t new file mode 100644 index 0000000..2970814 --- /dev/null +++ b/t/query.t @@ -0,0 +1,81 @@ +use strict; +use warnings; + +use Test::More tests => 23; + +use URI (); +my $u = URI->new("", "http"); +my @q; + +$u->query_form(a => 3, b => 4); +is $u, "?a=3&b=4"; + +$u->query_form(a => undef); +is $u, "?a="; + +$u->query_form("a[=&+#] " => " [=&+#]"); +is $u, "?a%5B%3D%26%2B%23%5D+=+%5B%3D%26%2B%23%5D"; + +@q = $u->query_form; +is join(":", @q), "a[=&+#] : [=&+#]"; + +@q = $u->query_keywords; +ok !@q; + +$u->query_keywords("a", "b"); +is $u, "?a+b"; + +$u->query_keywords(" ", "+", "=", "[", "]"); +is $u, "?%20+%2B+%3D+%5B+%5D"; + +@q = $u->query_keywords; +is join(":", @q), " :+:=:[:]"; + +@q = $u->query_form; +ok !@q; + +$u->query(" +?=#"); +is $u, "?%20+?=%23"; + +$u->query_keywords([qw(a b)]); +is $u, "?a+b"; + +$u->query_keywords([]); +is $u, ""; + +$u->query_form({ a => 1, b => 2 }); +ok $u eq "?a=1&b=2" || $u eq "?b=2&a=1"; + +$u->query_form([ a => 1, b => 2 ]); +is $u, "?a=1&b=2"; + +$u->query_form({}); +is $u, ""; + +$u->query_form([a => [1..4]]); +is $u, "?a=1&a=2&a=3&a=4"; + +$u->query_form([]); +is $u, ""; + +$u->query_form(a => { foo => 1 }); +ok "$u" =~ /^\?a=HASH\(/; + +$u->query_form(a => 1, b => 2, ';'); +is $u, "?a=1;b=2"; + +$u->query_form(a => 1, c => 2); +is $u, "?a=1;c=2"; + +$u->query_form(a => 1, c => 2, '&'); +is $u, "?a=1&c=2"; + +$u->query_form([a => 1, b => 2], ';'); +is $u, "?a=1;b=2"; + +$u->query_form([]); +{ + local $URI::DEFAULT_QUERY_FORM_DELIMITER = ';'; + $u->query_form(a => 1, b => 2); +} +is $u, "?a=1;b=2"; @@ -0,0 +1,21 @@ +use strict; +use warnings; + +use Test::More; + +plan tests => 6; + +use URI; + +my $uri; + +$uri = URI->new("http://www.example.com/foo/bar/"); +is($uri->rel("http://www.example.com/foo/bar/"), "./"); +is($uri->rel("HTTP://WWW.EXAMPLE.COM/foo/bar/"), "./"); +is($uri->rel("HTTP://WWW.EXAMPLE.COM/FOO/BAR/"), "../../foo/bar/"); +is($uri->rel("HTTP://WWW.EXAMPLE.COM:80/foo/bar/"), "./"); + +$uri = URI->new("http://www.example.com/foo/bar"); +is($uri->rel("http://www.example.com/foo/bar"), "bar"); +is($uri->rel("http://www.example.com/foo"), "foo/bar"); + diff --git a/t/rfc2732.t b/t/rfc2732.t new file mode 100644 index 0000000..d69960a --- /dev/null +++ b/t/rfc2732.t @@ -0,0 +1,59 @@ +# Test URIs containing IPv6 addresses + +use strict; +use warnings; + +use Test::More tests => 19; + +use URI; +my $uri = URI->new("http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html"); + +is $uri->as_string, "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html"; +is $uri->host, "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210"; +is $uri->host_port, "[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80"; +is $uri->port, "80"; + +$uri->port(undef); +is $uri->as_string, "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]/index.html"; +is $uri->host_port, "[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80"; +$uri->port(80); + +$uri->host("host"); +is $uri->as_string, "http://host:80/index.html"; + +$uri->host("FEDC:BA98:7654:3210:FEDC:BA98:7654:3210"); +is $uri->as_string, "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html"; +$uri->host_port("[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:88"); +is $uri->as_string, "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:88/index.html"; +$uri->host_port("[::1]:80"); +is $uri->as_string, "http://[::1]:80/index.html"; +$uri->host("::1:80"); +is $uri->as_string, "http://[::1:80]:80/index.html"; +$uri->host("[::1:80]"); +is $uri->as_string, "http://[::1:80]:80/index.html"; +$uri->host("[::1]:88"); +is $uri->as_string, "http://[::1]:88/index.html"; + + +$uri = URI->new("ftp://ftp:@[3ffe:2a00:100:7031::1]"); +is $uri->as_string, "ftp://ftp:@[3ffe:2a00:100:7031::1]"; + +is $uri->port, "21"; +ok !$uri->_port; + +is $uri->host("ftp"), "3ffe:2a00:100:7031::1"; + +is $uri, "ftp://ftp:\@ftp"; + +$uri = URI->new("http://[::1]"); +is $uri->host, "::1"; + +__END__ + + http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html + http://[1080:0:0:0:8:800:200C:417A]/index.html + http://[3ffe:2a00:100:7031::1] + http://[1080::8:800:200C:417A]/foo + http://[::192.9.5.5]/ipng + http://[::FFFF:129.144.52.38]:80/index.html + http://[2010:836B:4179::836B:4179] diff --git a/t/roy-test.t b/t/roy-test.t new file mode 100644 index 0000000..a7a9fdc --- /dev/null +++ b/t/roy-test.t @@ -0,0 +1,44 @@ +use strict; +use warnings; + +use Test qw(plan ok); +plan tests => 102; + +use URI; +use File::Spec::Functions qw(catfile); + +my $no = 1; + +my @prefix; +push(@prefix, "t") if -d "t"; + +for my $i (1..5) { + my $file = catfile(@prefix, "roytest$i.html"); + + open(FILE, $file) || die "Can't open $file: $!"; + print "# $file\n"; + my $base = undef; + while (<FILE>) { + if (/^<BASE href="([^"]+)">/) { + $base = URI->new($1); + } elsif (/^<a href="([^"]*)">.*<\/a>\s*=\s*(\S+)/) { + die "Missing base at line $." unless $base; + my $link = $1; + my $exp = $2; + $exp = $base if $exp =~ /current/; # special case test 22 + + # rfc2396bis restores the rfc1808 behaviour + if ($no == 7) { + $exp = "http://a/b/c/d;p?y"; + } + elsif ($no == 48) { + $exp = "http://a/b/c/d;p?y"; + } + + ok(URI->new($link)->abs($base), $exp); + + $no++; + } + } + close(FILE); +} diff --git a/t/roytest1.html b/t/roytest1.html new file mode 100644 index 0000000..95fedbe --- /dev/null +++ b/t/roytest1.html @@ -0,0 +1,194 @@ +<HTML><HEAD> +<TITLE>Examples of Resolving Relative URLs</TITLE> +<BASE href="http://a/b/c/d;p?q"> +</HEAD><BODY> +<H1>Examples of Resolving Relative URLs</H1> + +This document has an embedded base URL of +<PRE> + Content-Base: http://a/b/c/d;p?q +</PRE> +the relative URLs should be resolved as shown below. +<P> +I will need your help testing the examples on multiple browsers. +What you need to do is point to the example anchor and compare it to the +resolved URL in your browser (most browsers have a feature by which you +can see the resolved URL at the bottom of the window/screen when the anchor +is active). + +<H2>Tested Clients and Client Libraries</H2> + +<DL COMPACT> +<DT>[R] +<DD>RFC 2396 (the right way to parse) +<DT>[X] +<DD>RFC 1808 +<DT>[1] +<DD>Mozilla/4.03 [en] (X11; U; SunOS 5.5 sun4u; Nav) +<DT>[2] +<DD>Lynx/2.7.1 libwww-FM/2.14 +<DT>[3] +<DD>MSIE 3.01; Windows 95 +<DT>[4] +<DD>NCSA_Mosaic/2.6 (X11;SunOS 4.1.2 sun4m) libwww/2.12 +<DT>[5] +<DD>libwww-perl/5.14 [Martijn Koster] +</DL> + +<H2>Normal Examples</H2> +<PRE> + RESULTS from + +<a href="g:h">g:h</a> = g:h [R,X,2,3,4,5] + http://a/b/c/g:h [1] + +<a href="g">g</a> = http://a/b/c/g [R,X,1,2,3,4,5] + +<a href="./g">./g</a> = http://a/b/c/g [R,X,1,2,3,4,5] + +<a href="g/">g/</a> = http://a/b/c/g/ [R,X,1,2,3,4,5] + +<a href="/g">/g</a> = http://a/g [R,X,1,2,3,4,5] + +<a href="//g">//g</a> = http://g [R,X,1,2,3,4,5] + +<a href="?y">?y</a> = http://a/b/c/?y [R,1,2,3,4] + http://a/b/c/d;p?y [X,5] + +<a href="g?y">g?y</a> = http://a/b/c/g?y [R,X,1,2,3,4,5] + +<a name="s" href="#s">#s</a> = (current document)#s [R,2,4] + http://a/b/c/d;p?q#s [X,1,3,5] + +<a href="g#s">g#s</a> = http://a/b/c/g#s [R,X,1,2,3,4,5] + +<a href="g?y#s">g?y#s</a> = http://a/b/c/g?y#s [R,X,1,2,3,4,5] + +<a href=";x">;x</a> = http://a/b/c/;x [R,1,2,3,4] + http://a/b/c/d;x [X,5] + +<a href="g;x">g;x</a> = http://a/b/c/g;x [R,X,1,2,3,4,5] + +<a href="g;x?y#s">g;x?y#s</a> = http://a/b/c/g;x?y#s [R,X,1,2,3,4,5] + +<a href=".">.</a> = http://a/b/c/ [R,X,2,5] + http://a/b/c/. [1] + http://a/b/c [3,4] + +<a href="./">./</a> = http://a/b/c/ [R,X,1,2,3,4,5] + +<a href="..">..</a> = http://a/b/ [R,X,2,5] + http://a/b [1,3,4] + +<a href="../">../</a> = http://a/b/ [R,X,1,2,3,4,5] + +<a href="../g">../g</a> = http://a/b/g [R,X,1,2,3,4,5] + +<a href="../..">../..</a> = http://a/ [R,X,2,5] + http://a [1,3,4] + +<a href="../../">../../</a> = http://a/ [R,X,1,2,3,4,5] + +<a href="../../g">../../g</a> = http://a/g [R,X,1,2,3,4,5] +</PRE> + +<H2>Abnormal Examples</H2> + +Although the following abnormal examples are unlikely to occur in +normal practice, all URL parsers should be capable of resolving them +consistently. Each example uses the same base as above.<P> + +An empty reference refers to the start of the current document. +<PRE> +<a href=""><></a> = (current document) [R,2,4] + http://a/b/c/d;p?q [X,3,5] + http://a/b/c/ [1] +</PRE> +Parsers must be careful in handling the case where there are more +relative path ".." segments than there are hierarchical levels in the +base URL's path. Note that the ".." syntax cannot be used to change +the site component of a URL. +<PRE> +<a href="../../../g">../../../g</a> = http://a/../g [R,X,2,4,5] + http://a/g [R,1,3] + +<a href="../../../../g">../../../../g</a> = http://a/../../g [R,X,2,4,5] + http://a/g [R,1,3] +</PRE> +In practice, some implementations strip leading relative symbolic +elements (".", "..") after applying a relative URL calculation, based +on the theory that compensating for obvious author errors is better +than allowing the request to fail. Thus, the above two references +will be interpreted as "http://a/g" by some implementations. +<P> +Similarly, parsers must avoid treating "." and ".." as special when +they are not complete components of a relative path. +<PRE> +<a href="/./g">/./g</a> = http://a/./g [R,X,2,3,4,5] + http://a/g [1] + +<a href="/../g">/../g</a> = http://a/../g [R,X,2,3,4,5] + http://a/g [1] + +<a href="g.">g.</a> = http://a/b/c/g. [R,X,1,2,3,4,5] + +<a href=".g">.g</a> = http://a/b/c/.g [R,X,1,2,3,4,5] + +<a href="g..">g..</a> = http://a/b/c/g.. [R,X,1,2,3,4,5] + +<a href="..g">..g</a> = http://a/b/c/..g [R,X,1,2,3,4,5] +</PRE> +Less likely are cases where the relative URL uses unnecessary or +nonsensical forms of the "." and ".." complete path segments. +<PRE> +<a href="./../g">./../g</a> = http://a/b/g [R,X,1,2,5] + http://a/b/c/../g [3,4] + +<a href="./g/.">./g/.</a> = http://a/b/c/g/ [R,X,2,5] + http://a/b/c/g/. [1] + http://a/b/c/g [3,4] + +<a href="g/./h">g/./h</a> = http://a/b/c/g/h [R,X,1,2,3,4,5] + +<a href="g/../h">g/../h</a> = http://a/b/c/h [R,X,1,2,3,4,5] + +<a href="g;x=1/./y">g;x=1/./y</a> = http://a/b/c/g;x=1/y [R,1,2,3,4] + http://a/b/c/g;x=1/./y [X,5] + +<a href="g;x=1/../y">g;x=1/../y</a> = http://a/b/c/y [R,1,2,3,4] + http://a/b/c/g;x=1/../y [X,5] + +</PRE> +All client applications remove the query component from the base URL +before resolving relative URLs. However, some applications fail to +separate the reference's query and/or fragment components from a +relative path before merging it with the base path. This error is +rarely noticed, since typical usage of a fragment never includes the +hierarchy ("/") character, and the query component is not normally +used within relative references. +<PRE> +<a href="g?y/./x">g?y/./x</a> = http://a/b/c/g?y/./x [R,X,5] + http://a/b/c/g?y/x [1,2,3,4] + +<a href="g?y/../x">g?y/../x</a> = http://a/b/c/g?y/../x [R,X,5] + http://a/b/c/x [1,2,3,4] + +<a href="g#s/./x">g#s/./x</a> = http://a/b/c/g#s/./x [R,X,2,3,4,5] + http://a/b/c/g#s/x [1] + +<a href="g#s/../x">g#s/../x</a> = http://a/b/c/g#s/../x [R,X,2,3,4,5] + http://a/b/c/x [1] +</PRE> + Some parsers allow the scheme name to be present in a relative URI if + it is the same as the base URI scheme. This is considered to be a + loophole in prior specifications of partial URI [RFC1630]. Its use + should be avoided. +<PRE> +<a href="http:g">http:g</a> = http:g [R,X,5] + | http://a/b/c/g [1,2,3,4] (ok for compat.) + +<a href="http:">http:</a> = http: [R,X,5] + http://a/b/c/ [1] + http://a/b/c/d;p?q [2,3,4] +</PRE> +</BODY></HTML> diff --git a/t/roytest2.html b/t/roytest2.html new file mode 100644 index 0000000..3906f4e --- /dev/null +++ b/t/roytest2.html @@ -0,0 +1,100 @@ +<HTML><HEAD> +<TITLE>Examples of Resolving Relative URLs, Part 2</TITLE> +<BASE href="http://a/b/c/d;p?q=1/2"> +</HEAD><BODY> +<H1>Examples of Resolving Relative URLs, Part 2</H1> + +This document has an embedded base URL of +<PRE> + Content-Base: http://a/b/c/d;p?q=1/2 +</PRE> +the relative URLs should be resolved as shown below. In this test page, +I am particularly interested in testing whether "/" in query information +is or is not treated as part of the path hierarchy. +<P> +I will need your help testing the examples on multiple browsers. +What you need to do is point to the example anchor and compare it to the +resolved URL in your browser (most browsers have a feature by which you +can see the resolved URL at the bottom of the window/screen when the anchor +is active). + +<H2>Tested Clients and Client Libraries</H2> + +<DL COMPACT> +<DT>[R] +<DD>RFC 2396 (the right way to parse) +<DT>[X] +<DD>RFC 1808 +<DT>[1] +<DD>Mozilla/4.03 [en] (X11; U; SunOS 5.5 sun4u; Nav) +<DT>[2] +<DD>Lynx/2.7.1 libwww-FM/2.14 +<DT>[3] +<DD>MSIE 3.01; Windows 95 +<DT>[4] +<DD>NCSA_Mosaic/2.6 (X11;SunOS 4.1.2 sun4m) libwww/2.12 +</DL> + +<H3>Synopsis</H3> + +RFC 1808 specified that the "/" character within query information +does not affect the hierarchy within URL parsing. It would appear that +it does in current practice, but only within the relative path after +it is attached to the base path. In other words, the base URL's query +information is being stripped off before any relative resolution, but +some parsers fail to separate the query information from the relative +path.<P> + +We have decided that this behavior is due to an oversight in the original +libwww implementation, and it is better to correct the oversight in future +parsers than it is to make a nonsensical standard. A note has been added +to the URI draft to account for the differences in implementations. This should +have no impact on current practice since unescaped "/" is rarely (if ever) +used within the query part of a URL, and query parts themselves are rarely +used with relative URLs. + +<H2>Examples</H2> +<PRE> + RESULTS from + +<a href="g">g</a> = http://a/b/c/g [R,X,1,2,3,4] + +<a href="./g">./g</a> = http://a/b/c/g [R,X,1,2,3,4] + +<a href="g/">g/</a> = http://a/b/c/g/ [R,X,1,2,3,4] + +<a href="/g">/g</a> = http://a/g [R,X,1,2,3,4] + +<a href="//g">//g</a> = http://g [R,X,1,2,3,4] + +<a href="?y">?y</a> = http://a/b/c/?y [R,1,2,3,4] + http://a/b/c/d;p?y [X] + +<a href="g?y">g?y</a> = http://a/b/c/g?y [R,X,1,2,3,4] + +<a href="g?y/./x">g?y/./x</a> = http://a/b/c/g?y/./x [R,X] + http://a/b/c/g?y/x [1,2,3,4] + +<a href="g?y/../x">g?y/../x</a> = http://a/b/c/g?y/../x [R,X] + http://a/b/c/x [1,2,3,4] + +<a href="g#s">g#s</a> = http://a/b/c/g#s [R,X,1,2,3,4] + +<a href="g#s/./x">g#s/./x</a> = http://a/b/c/g#s/./x [R,X,2,3,4] + http://a/b/c/g#s/x [1] + +<a href="g#s/../x">g#s/../x</a> = http://a/b/c/g#s/../x [R,X,2,3,4] + http://a/b/c/x [1] + +<a href="./">./</a> = http://a/b/c/ [R,X,1,2,3,4] + +<a href="../">../</a> = http://a/b/ [R,X,1,2,3,4] + +<a href="../g">../g</a> = http://a/b/g [R,X,1,2,3,4] + +<a href="../../">../../</a> = http://a/ [R,X,1,2,3,4] + +<a href="../../g">../../g</a> = http://a/g [R,X,1,2,3,4] + +</PRE> +</BODY></HTML> diff --git a/t/roytest3.html b/t/roytest3.html new file mode 100644 index 0000000..699558f --- /dev/null +++ b/t/roytest3.html @@ -0,0 +1,89 @@ +<HTML><HEAD> +<TITLE>Examples of Resolving Relative URLs, Part 3</TITLE> +<BASE href="http://a/b/c/d;p=1/2?q"> +</HEAD><BODY> +<H1>Examples of Resolving Relative URLs, Part 3</H1> + +This document has an embedded base URL of +<PRE> + Content-Base: http://a/b/c/d;p=1/2?q +</PRE> +the relative URLs should be resolved as shown below. For this test page, +I am particularly interested in testing whether "/" in parameters is or is not +treated as part of the path hierarchy. +<P> +I will need your help testing the examples on multiple browsers. +What you need to do is point to the example anchor and compare it to the +resolved URL in your browser (most browsers have a feature by which you +can see the resolved URL at the bottom of the window/screen when the anchor +is active). + +<H2>Tested Clients and Client Libraries</H2> + +<DL COMPACT> +<DT>[R] +<DD>RFC 2396 (the right way to parse) +<DT>[X] +<DD>RFC 1808 +<DT>[1] +<DD>Mozilla/4.03 [en] (X11; U; SunOS 5.5 sun4u; Nav) +<DT>[2] +<DD>Lynx/2.7.1 libwww-FM/2.14 +<DT>[3] +<DD>MSIE 3.01; Windows 95 +<DT>[4] +<DD>NCSA_Mosaic/2.6 (X11;SunOS 4.1.2 sun4m) libwww/2.12 +</DL> + +<H3>Synopsis</H3> + +RFC 1808 specified that the "/" character within parameter information +does not affect the hierarchy within URL parsing. It would appear that +it does in current practice. This implies that the parameters should +be part of each path segment and not outside the path. The URI draft has +been written accordingly. + +<H2>Examples</H2> +<PRE> + RESULTS from + +<a href="g">g</a> = http://a/b/c/d;p=1/g [R,1,2,3,4] + http://a/b/c/g [X] + +<a href="./g">./g</a> = http://a/b/c/d;p=1/g [R,1,2,3,4] + http://a/b/c/g [X] + +<a href="g/">g/</a> = http://a/b/c/d;p=1/g/ [R,1,2,3,4] + http://a/b/c/g/ [X] + +<a href="g?y">g?y</a> = http://a/b/c/d;p=1/g?y [R,1,2,3,4] + http://a/b/c/g?y [X] + +<a href=";x">;x</a> = http://a/b/c/d;p=1/;x [R,1,2,3,4] + http://a/b/c/d;x [X] + +<a href="g;x">g;x</a> = http://a/b/c/d;p=1/g;x [R,1,2,3,4] + http://a/b/c/g;x [X] + +<a href="g;x=1/./y">g;x=1/./y</a> = http://a/b/c/d;p=1/g;x=1/y [R,1,2,3,4] + http://a/b/c/g;x=1/./y [X] + +<a href="g;x=1/../y">g;x=1/../y</a> = http://a/b/c/d;p=1/y [R,1,2,3,4] + http://a/b/c/g;x=1/../y [X] + +<a href="./">./</a> = http://a/b/c/d;p=1/ [R,1,2,3,4] + http://a/b/c/ [X] + +<a href="../">../</a> = http://a/b/c/ [R,1,2,3,4] + http://a/b/ [X] + +<a href="../g">../g</a> = http://a/b/c/g [R,1,2,3,4] + http://a/b/g [X] + +<a href="../../">../../</a> = http://a/b/ [R,1,2,3,4] + http://a/ [X] + +<a href="../../g">../../g</a> = http://a/b/g [R,1,2,3,4] + http://a/g [X] +</PRE> +</BODY></HTML> diff --git a/t/roytest4.html b/t/roytest4.html new file mode 100644 index 0000000..160554c --- /dev/null +++ b/t/roytest4.html @@ -0,0 +1,98 @@ +<HTML><HEAD> +<TITLE>Examples of Resolving Relative URLs, Part 4</TITLE> +<BASE href="fred:///s//a/b/c"> +</HEAD><BODY> +<H1>Examples of Resolving Relative URLs, Part 4</H1> + +This document has an embedded base URL of +<PRE> + Content-Base: fred:///s//a/b/c +</PRE> +in order to test a notion that Tim Berners-Lee mentioned regarding +the ability of URIs to have a triple-slash (or even more slashes) +to indicate higher levels of hierarchy than those already used by URLs. + +<H2>Tested Clients and Client Libraries</H2> + +<DL COMPACT> +<DT>[R] +<DD>RFC 2396 (the right way to parse) +<DT>Tim +<DD>Tim Berners-Lee's proposed interpretation +<DT>[1] +<DD>Mozilla/4.03 [en] (X11; U; SunOS 5.5 sun4u; Nav) +<DT>[2] +<DD>Lynx/2.7.1 libwww-FM/2.14 +<DT>[3] +<DD>MSIE 3.01; Windows 95 +<DT>[4] +<DD>NCSA_Mosaic/2.6 (X11;SunOS 4.1.2 sun4m) +</DL> + +<H3>Synopsis</H3> + +RFC 1808 specified that the highest level for relative URLs is indicated +by a double-slash "//", and therefore that any triple-slash would be +considered a null site component, rather than a higher-level component +than the site component (as proposed by Tim).<P> + +The URI draft assumes that a triple-slash means an empty site component. +Netscape Navigator behaves irrationally, apparently because their parser +is scheme-dependent and therefore doesn't do the hierarchical parsing that +would be expected. Oddly, Lynx seems to straddle both sides. + +<H2>Examples</H2> +<PRE> + RESULTS from + +<a href="g:h">g:h</a> = g:h [R,Tim,2,3] + fred:///s//a/b/g:h [1] + +<a href="g">g</a> = fred:///s//a/b/g [R,Tim,1,2,3] + +<a href="./g">./g</a> = fred:///s//a/b/g [R,Tim,2,3] + fred:///s//a/b/./g [1] + +<a href="g/">g/</a> = fred:///s//a/b/g/ [R,Tim,1,2,3] + +<a href="/g">/g</a> = fred:///g [R,1,2,3] + fred:///s//a/g [Tim] + +<a href="//g">//g</a> = fred://g [R,1,2,3] + fred:///s//g [Tim] + +<a href="//g/x">//g/x</a> = fred://g/x [R,1,2,3] + fred:///s//g/x [Tim] + +<a href="///g">///g</a> = fred:///g [R,Tim,1,2,3] + +<a href="./">./</a> = fred:///s//a/b/ [R,Tim,2,3] + fred:///s//a/b/./ [1] + +<a href="../">../</a> = fred:///s//a/ [R,Tim,2,3] + fred:///s//a/b/../ [1] + +<a href="../g">../g</a> = fred:///s//a/g [R,Tim,2,3] + fred:///s//a/b/../g [1] + +<a href="../../">../../</a> = fred:///s// [R] + fred:///s//a/../ [Tim,2] + fred:///s//a/b/../../ [1] + fred:///s//a/ [3] + +<a href="../../g">../../g</a> = fred:///s//g [R] + fred:///s//a/../g [Tim,2] + fred:///s//a/b/../../g [1] + fred:///s//a/g [3] + +<a href="../../../g">../../../g</a> = fred:///s/g [R] + fred:///s//a/../../g [Tim,2] + fred:///s//a/b/../../../g [1] + fred:///s//a/g [3] + +<a href="../../../../g">../../../../g</a> = fred:///g [R] + fred:///s//a/../../../g [Tim,2] + fred:///s//a/b/../../../../g [1] + fred:///s//a/g [3] +</PRE> +</BODY></HTML> diff --git a/t/roytest5.html b/t/roytest5.html new file mode 100644 index 0000000..1b24361 --- /dev/null +++ b/t/roytest5.html @@ -0,0 +1,92 @@ +<HTML><HEAD> +<TITLE>Examples of Resolving Relative URLs, Part 5</TITLE> +<BASE href="http:///s//a/b/c"> +</HEAD><BODY> +<H1>Examples of Resolving Relative URLs, Part 5</H1> + +This document has an embedded base URL of +<PRE> + Content-Base: http:///s//a/b/c +</PRE> +in order to test a notion that Tim Berners-Lee mentioned regarding +the ability of URIs to have a triple-slash (or even more slashes) +to indicate higher levels of hierarchy than those already used by URLs. +This is the same as Part 4, except that the scheme "fred" is replaced +with "http" for clients that stupidly change their parsing behavior +based on the scheme name. + +<H2>Tested Clients and Client Libraries</H2> + +<DL COMPACT> +<DT>[R] +<DD>RFC 2396 (the right way to parse) +<DT>Tim +<DD>Tim Berners-Lee's proposed interpretation +<DT>[1] +<DD>Mozilla/4.03 [en] (X11; U; SunOS 5.5 sun4u; Nav) +<DT>[2] +<DD>Lynx/2.7.1 libwww-FM/2.14 +<DT>[3] +<DD>MSIE 3.01; Windows 95 +<DT>[4] +<DD>NCSA_Mosaic/2.6 (X11;SunOS 4.1.2 sun4m) +</DL> + +<H3>Synopsis</H3> + +RFC 1808 specified that the highest level for relative URLs is indicated +by a double-slash "//", and therefore that any triple-slash would be +considered a null site component, rather than a higher-level component +than the site component (as proposed by Tim).<P> + +Draft 09 assumes that a triple-slash means an empty site component, +as does Netscape Navigator if the scheme is known. +Oddly, Lynx seems to straddle both sides. + +<H2>Examples</H2> +<PRE> + RESULTS from + +<a href="g:h">g:h</a> = g:h [R,Tim,2,3] + http:///s//a/b/g:h [1] + +<a href="g">g</a> = http:///s//a/b/g [R,Tim,1,2,3] + +<a href="./g">./g</a> = http:///s//a/b/g [R,Tim,1,2,3] + +<a href="g/">g/</a> = http:///s//a/b/g/ [R,Tim,1,2,3] + +<a href="/g">/g</a> = http:///g [R,1,2,3] + http:///s//a/g [Tim] + +<a href="//g">//g</a> = http://g [R,1,2,3] + http:///s//g [Tim] + +<a href="//g/x">//g/x</a> = http://g/x [R,1,2,3] + http:///s//g/x [Tim] + +<a href="///g">///g</a> = http:///g [R,Tim,1,2,3] + +<a href="./">./</a> = http:///s//a/b/ [R,Tim,1,2,3] + +<a href="../">../</a> = http:///s//a/ [R,Tim,1,2,3] + +<a href="../g">../g</a> = http:///s//a/g [R,Tim,1,2,3] + +<a href="../../">../../</a> = http:///s// [R,1] + http:///s//a/../ [Tim,2] + http:///s//a/ [3] + +<a href="../../g">../../g</a> = http:///s//g [R,1] + http:///s//a/../g [Tim,2] + http:///s//a/g [3] + +<a href="../../../g">../../../g</a> = http:///s/g [R,1] + http:///s//a/../../g [Tim,2] + http:///s//a/g [3] + +<a href="../../../../g">../../../../g</a> = http:///g [R,1] + http:///s//a/../../../g [Tim,2] + http:///s//a/g [3] +</PRE> +</BODY></HTML> diff --git a/t/rsync.t b/t/rsync.t new file mode 100644 index 0000000..01e91d7 --- /dev/null +++ b/t/rsync.t @@ -0,0 +1,23 @@ +use strict; +use warnings; + +print "1..4\n"; + +use URI; + +my $u = URI->new('rsync://gisle@perl.com/foo/bar'); + +print "not " unless $u->user eq "gisle"; +print "ok 1\n"; + +print "not " unless $u->port eq 873; +print "ok 2\n"; + +print "not " unless $u->path eq "/foo/bar"; +print "ok 3\n"; + +$u->port(8730); + +print "not " unless $u eq 'rsync://gisle@perl.com:8730/foo/bar'; +print "ok 4\n"; + diff --git a/t/rtsp.t b/t/rtsp.t new file mode 100644 index 0000000..208b63b --- /dev/null +++ b/t/rtsp.t @@ -0,0 +1,43 @@ +use strict; +use warnings; + +print "1..9\n"; + +use URI; + +my $u = URI->new("<rtsp://media.perl.com/fôo.smi/>"); + +#print "$u\n"; +print "not " unless $u eq "rtsp://media.perl.com/f%F4o.smi/"; +print "ok 1\n"; + +print "not " unless $u->port == 554; +print "ok 2\n"; + +# play with port +my $old = $u->port(8554); +print "not " unless $old == 554 && $u eq "rtsp://media.perl.com:8554/f%F4o.smi/"; +print "ok 3\n"; + +$u->port(554); +print "not " unless $u eq "rtsp://media.perl.com:554/f%F4o.smi/"; +print "ok 4\n"; + +$u->port(""); +print "not " unless $u eq "rtsp://media.perl.com:/f%F4o.smi/" && $u->port == 554; +print "ok 5\n"; + +$u->port(undef); +print "not " unless $u eq "rtsp://media.perl.com/f%F4o.smi/"; +print "ok 6\n"; + +print "not " unless $u->host eq "media.perl.com"; +print "ok 7\n"; + +print "not " unless $u->path eq "/f%F4o.smi/"; +print "ok 8\n"; + +$u->scheme("rtspu"); +print "not " unless $u->scheme eq "rtspu"; +print "ok 9\n"; + @@ -0,0 +1,69 @@ +use strict; +use warnings; + +print "1..11\n"; + +use URI; + +my $u = URI->new('sip:phone@domain.ext'); +print "not " unless $u->user eq 'phone' && + $u->host eq 'domain.ext' && + $u->port eq '5060' && + $u eq 'sip:phone@domain.ext'; +print "ok 1\n"; + +$u->host_port('otherdomain.int:9999'); +print "not " unless $u->host eq 'otherdomain.int' && + $u->port eq '9999' && + $u eq 'sip:phone@otherdomain.int:9999'; +print "ok 2\n"; + +$u->port('5060'); +$u = $u->canonical; +print "not " unless $u->host eq 'otherdomain.int' && + $u->port eq '5060' && + $u eq 'sip:phone@otherdomain.int'; +print "ok 3\n"; + +$u->user('voicemail'); +print "not " unless $u->user eq 'voicemail' && + $u eq 'sip:voicemail@otherdomain.int'; +print "ok 4\n"; + +$u = URI->new('sip:phone@domain.ext?Subject=Meeting&Priority=Urgent'); +print "not " unless $u->host eq 'domain.ext' && + $u->query eq 'Subject=Meeting&Priority=Urgent'; +print "ok 5\n"; + +$u->query_form(Subject => 'Lunch', Priority => 'Low'); +my @q = $u->query_form; +print "not " unless $u->host eq 'domain.ext' && + $u->query eq 'Subject=Lunch&Priority=Low' && + @q == 4 && "@q" eq "Subject Lunch Priority Low"; +print "ok 6\n"; + +$u = URI->new('sip:phone@domain.ext;maddr=127.0.0.1;ttl=16'); +print "not " unless $u->host eq 'domain.ext' && + $u->params eq 'maddr=127.0.0.1;ttl=16'; +print "ok 7\n"; + +$u = URI->new('sip:phone@domain.ext?Subject=Meeting&Priority=Urgent'); +$u->params_form(maddr => '127.0.0.1', ttl => '16'); +my @p = $u->params_form; +print "not " unless $u->host eq 'domain.ext' && + $u->query eq 'Subject=Meeting&Priority=Urgent' && + $u->params eq 'maddr=127.0.0.1;ttl=16' && + @p == 4 && "@p" eq "maddr 127.0.0.1 ttl 16"; + +print "ok 8\n"; + +$u = URI->new_abs('sip:phone@domain.ext', 'sip:foo@domain2.ext'); +print "not " unless $u eq 'sip:phone@domain.ext'; +print "ok 9\n"; + +$u = URI->new('sip:phone@domain.ext'); +print "not " unless $u eq $u->abs('http://www.cpan.org/'); +print "ok 10\n"; + +print "not " unless $u eq $u->rel('http://www.cpan.org/'); +print "ok 11\n"; diff --git a/t/sort-hash-query-form.t b/t/sort-hash-query-form.t new file mode 100644 index 0000000..7c6f896 --- /dev/null +++ b/t/sort-hash-query-form.t @@ -0,0 +1,22 @@ +use strict; +use warnings; + +use Test::More; + +# ABSTRACT: Make sure query_form(\%hash) is sorted + +use URI; + +my $base = URI->new('http://example.org/'); + +my $i = 1; + +my $hash = { map { $_ => $i++ } qw( a b c d e f ) }; + +$base->query_form($hash); + +is("$base","http://example.org/?a=1&b=2&c=3&d=4&e=5&f=6", "Query parameters are sorted"); + +done_testing; + + diff --git a/t/split.t b/t/split.t new file mode 100644 index 0000000..34104b8 --- /dev/null +++ b/t/split.t @@ -0,0 +1,59 @@ +use strict; +use warnings; + +print "1..17\n"; + +use URI::Split qw(uri_split uri_join); + +sub j { join("-", map { defined($_) ? $_ : "<undef>" } @_) } + +print "not " unless j(uri_split("p")) eq "<undef>-<undef>-p-<undef>-<undef>"; +print "ok 1\n"; + +print "not " unless j(uri_split("p?q")) eq "<undef>-<undef>-p-q-<undef>"; +print "ok 2\n"; + +print "not " unless j(uri_split("p#f")) eq "<undef>-<undef>-p-<undef>-f"; +print "ok 3\n"; + +print "not " unless j(uri_split("p?q/#f/?")) eq "<undef>-<undef>-p-q/-f/?"; +print "ok 4\n"; + +print "not " unless j(uri_split("s://a/p?q#f")) eq "s-a-/p-q-f"; +print "ok 5\n"; + +print "not " unless uri_join("s", "a", "/p", "q", "f") eq "s://a/p?q#f"; +print "ok 6\n"; + +print "not " unless uri_join("s", "a", "p", "q", "f") eq "s://a/p?q#f"; +print "ok 7\n"; + +print "not " unless uri_join(undef, undef, "", undef, undef) eq ""; +print "ok 8\n"; + +print "not " unless uri_join(undef, undef, "p", undef, undef) eq "p"; +print "ok 9\n"; + +print "not " unless uri_join("s", undef, "p") eq "s:p"; +print "ok 10\n"; + +print "not " unless uri_join("s") eq "s:"; +print "ok 11\n"; + +print "not " unless uri_join() eq ""; +print "ok 12\n"; + +print "not " unless uri_join("s", "a") eq "s://a"; +print "ok 13\n"; + +print "not " unless uri_join("s", "a/b") eq "s://a%2Fb"; +print "ok 14\n"; + +print "not " unless uri_join("s", ":/?#", ":/?#", ":/?#", ":/?#") eq "s://:%2F%3F%23/:/%3F%23?:/?%23#:/?#"; +print "ok 15\n"; + +print "not " unless uri_join(undef, undef, "a:b") eq "a%3Ab"; +print "ok 16\n"; + +print "not " unless uri_join("s", undef, "//foo//bar") eq "s:////foo//bar"; +print "ok 17\n"; diff --git a/t/storable-test.pl b/t/storable-test.pl new file mode 100644 index 0000000..33deb6f --- /dev/null +++ b/t/storable-test.pl @@ -0,0 +1,27 @@ +use strict; +use warnings; +use Storable; + +if (@ARGV && $ARGV[0] eq "store") { + require URI; + require URI::URL; + my $a = { + u => new URI('http://search.cpan.org/'), + }; + print "# store\n"; + store [URI->new("http://search.cpan.org")], 'urls.sto'; +} else { + print "# retrieve\n"; + my $a = retrieve 'urls.sto'; + my $u = $a->[0]; + #use Data::Dumper; print Dumper($a); + + print "not " unless $u eq "http://search.cpan.org"; + print "ok 1\n"; + + print "not " unless $u->scheme eq "http"; + print "ok 2\n"; + + print "not " unless ref($u) eq "URI::http"; + print "ok 3\n"; +} diff --git a/t/storable.t b/t/storable.t new file mode 100644 index 0000000..cf6e65a --- /dev/null +++ b/t/storable.t @@ -0,0 +1,16 @@ +use strict; +use warnings; + +eval { + require Storable; + print "1..3\n"; +}; +if ($@) { + print "1..0 # skipped: Needs the Storable module installed\n"; + exit; +} + +system($^X, "-Iblib/lib", "t/storable-test.pl", "store"); +system($^X, "-Iblib/lib", "t/storable-test.pl", "retrieve"); + +unlink('urls.sto'); diff --git a/t/urn-isbn.t b/t/urn-isbn.t new file mode 100644 index 0000000..d8985f7 --- /dev/null +++ b/t/urn-isbn.t @@ -0,0 +1,62 @@ +use strict; +use warnings; + +eval { + require Business::ISBN; +}; +if ($@) { + print "1..0 # Skipped: Needs the Business::ISBN module installed\n\n"; + print $@; + exit; +} + +print "1..13\n"; + +use URI; +my $u = URI->new("URN:ISBN:0395363411"); + +print "not " unless $u eq "URN:ISBN:0395363411" && + $u->scheme eq "urn" && + $u->nid eq "isbn"; +print "ok 1\n"; + +print "not " unless $u->canonical eq "urn:isbn:0-395-36341-1"; +print "ok 2\n"; + +print "not " unless $u->isbn eq "0-395-36341-1"; +print "ok 3\n"; + +print "not " unless $u->isbn_group_code == 0; +print "ok 4\n"; + +print "not " unless $u->isbn_publisher_code == 395; +print "ok 5\n"; + +print "not " unless $u->isbn13 eq "9780395363416"; +print "ok 6\n"; + +print "not " unless $u->nss eq "0395363411"; +print "ok 7\n"; + +print "not " unless $u->isbn("0-88730-866-x") eq "0-395-36341-1"; +print "ok 8\n"; + +print "not " unless $u->nss eq "0-88730-866-x"; +print "ok 9\n"; + +print "not " unless $u->isbn eq "0-88730-866-X"; +print "ok 10\n"; + +print "not " unless URI::eq("urn:isbn:088730866x", "URN:ISBN:0-88-73-08-66-X"); +print "ok 11\n"; + +# try to illegal ones +$u = URI->new("urn:ISBN:abc"); +print "not " unless $u eq "urn:ISBN:abc"; +print "ok 12\n"; + +print "not " if $u->nss ne "abc" || defined $u->isbn; +print "ok 13\n"; + + + diff --git a/t/urn-oid.t b/t/urn-oid.t new file mode 100644 index 0000000..d35e524 --- /dev/null +++ b/t/urn-oid.t @@ -0,0 +1,24 @@ +use strict; +use warnings; + +print "1..4\n"; + +use URI; + +my $u = URI->new("urn:oid"); + +$u->oid(1..10); + +#print "$u\n"; + +print "not " unless $u eq "urn:oid:1.2.3.4.5.6.7.8.9.10"; +print "ok 1\n"; + +print "not " unless $u->oid eq "1.2.3.4.5.6.7.8.9.10"; +print "ok 2\n"; + +print "not " unless $u->scheme eq "urn" && $u->nid eq "oid"; +print "ok 3\n"; + +print "not " unless $u->oid eq $u->nss; +print "ok 4\n"; diff --git a/t/utf8.t b/t/utf8.t new file mode 100644 index 0000000..1453cfc --- /dev/null +++ b/t/utf8.t @@ -0,0 +1,20 @@ +use strict; +use warnings; + +use utf8; + +use Test::More 'no_plan'; +use URI; + +is(URI->new('http://foobar/mooi€e')->as_string, 'http://foobar/mooi%E2%82%ACe'); + +my $uri = URI->new('http:'); +$uri->query_form("mooi€e" => "mooi€e"); +is( $uri->query, "mooi%E2%82%ACe=mooi%E2%82%ACe" ); +is( ($uri->query_form)[1], "mooi\xE2\x82\xACe" ); + +# RT#70161 +use Encode; +$uri = URI->new(decode_utf8 '?Query=%C3%A4%C3%B6%C3%BC'); +is( ($uri->query_form)[1], "\xC3\xA4\xC3\xB6\xC3\xBC"); +is( decode_utf8(($uri->query_form)[1]), 'äöü'); diff --git a/uri-test b/uri-test new file mode 100755 index 0000000..ca30ef8 --- /dev/null +++ b/uri-test @@ -0,0 +1,58 @@ +#!/usr/bin/perl -w + +use strict; +use warnings; + +sub usage { + my $prog = $0; $prog =~ s,.*/,,; + die "Usage: $prog <uri> [<method> [<args>]...]\n"; +} + +usage() unless @ARGV; +my $uri = shift; +my $orig = $uri; + +require URI; + +my @ctor_arg = ($uri); +push(@ctor_arg, shift) while @ARGV && $ARGV[0] =~ s/^\+//; + +$uri = URI->new(@ctor_arg); + +if (@ARGV) { + my $method = shift; + my $list_context = ($method =~ s/^\@//); + #print "URI->new(\"$uri\")->$method ==> "; + for (@ARGV) { + undef($_) if $_ eq "UNDEF"; + } + + my @result; + if ($list_context) { + @result = $uri->$method(@ARGV); + } else { + @result = scalar($uri->$method(@ARGV)); + } + + for (@result) { + if (defined) { + $_ = "«$_»" if /^\s*$/; + } else { + $_ = "<undef>"; + } + } + print join(" ", @result), "\n"; +} +print "$uri\n" unless $orig eq $uri; +exit; + +# Some extra methods that might be nice + +sub UNIVERSAL::class { ref($_[0]) } + +sub UNIVERSAL::dump { + require Data::Dumper; + my $d = Data::Dumper->Dump(\@_, ["self", "arg1", "arg2", "arg3", "arg4"]); + chomp($d); + $d; +} |