From 9165b237ad8fae18b36d4d40d6e2ccfde7b136c7 Mon Sep 17 00:00:00 2001 From: Lorry Tar Creator Date: Sat, 25 Jul 2015 01:06:42 +0000 Subject: URI-1.69 --- Changes | 1009 ++++++++++++++++++++++++++++++++++++++++ LICENSE | 379 +++++++++++++++ MANIFEST | 109 +++++ META.json | 91 ++++ META.yml | 60 +++ Makefile.PL | 138 ++++++ README | 667 ++++++++++++++++++++++++++ lib/URI.pm | 1155 ++++++++++++++++++++++++++++++++++++++++++++++ lib/URI/Escape.pm | 220 +++++++++ lib/URI/Heuristic.pm | 253 ++++++++++ lib/URI/IRI.pm | 47 ++ lib/URI/QueryParam.pm | 207 +++++++++ lib/URI/Split.pm | 97 ++++ lib/URI/URL.pm | 303 ++++++++++++ lib/URI/WithBase.pm | 174 +++++++ lib/URI/_foreign.pm | 10 + lib/URI/_generic.pm | 256 ++++++++++ lib/URI/_idna.pm | 91 ++++ lib/URI/_ldap.pm | 140 ++++++ lib/URI/_login.pm | 13 + lib/URI/_punycode.pm | 203 ++++++++ lib/URI/_query.pm | 97 ++++ lib/URI/_segment.pm | 24 + lib/URI/_server.pm | 166 +++++++ lib/URI/_userpass.pm | 55 +++ lib/URI/data.pm | 142 ++++++ lib/URI/file.pm | 327 +++++++++++++ lib/URI/file/Base.pm | 84 ++++ lib/URI/file/FAT.pm | 27 ++ lib/URI/file/Mac.pm | 121 +++++ lib/URI/file/OS2.pm | 32 ++ lib/URI/file/QNX.pm | 20 + lib/URI/file/Unix.pm | 58 +++ lib/URI/file/Win32.pm | 87 ++++ lib/URI/ftp.pm | 46 ++ lib/URI/gopher.pm | 97 ++++ lib/URI/http.pm | 27 ++ lib/URI/https.pm | 14 + lib/URI/ldap.pm | 120 +++++ lib/URI/ldapi.pm | 29 ++ lib/URI/ldaps.pm | 14 + lib/URI/mailto.pm | 73 +++ lib/URI/mms.pm | 12 + lib/URI/news.pm | 71 +++ lib/URI/nntp.pm | 10 + lib/URI/pop.pm | 71 +++ lib/URI/rlogin.pm | 12 + lib/URI/rsync.pm | 14 + lib/URI/rtsp.pm | 12 + lib/URI/rtspu.pm | 12 + lib/URI/sftp.pm | 10 + lib/URI/sip.pm | 85 ++++ lib/URI/sips.pm | 14 + lib/URI/snews.pm | 14 + lib/URI/ssh.pm | 16 + lib/URI/telnet.pm | 12 + lib/URI/tn3270.pm | 12 + lib/URI/urn.pm | 100 ++++ lib/URI/urn/isbn.pm | 103 +++++ lib/URI/urn/oid.pm | 20 + t/abs.t | 173 +++++++ t/clone.t | 21 + t/cwd.t | 15 + t/data.t | 111 +++++ t/distmanifest.t | 11 + t/escape-char.t | 29 ++ t/escape.t | 37 ++ t/file.t | 65 +++ t/ftp.t | 53 +++ t/generic.t | 219 +++++++++ t/gopher.t | 46 ++ t/heuristic.t | 138 ++++++ t/http.t | 66 +++ t/idna.t | 14 + t/iri.t | 76 +++ t/ldap.t | 119 +++++ t/mailto.t | 48 ++ t/mix.t | 80 ++++ t/mms.t | 38 ++ t/news.t | 51 ++ t/num_eq.t | 16 + t/old-absconf.t | 38 ++ t/old-base.t | 978 +++++++++++++++++++++++++++++++++++++++ t/old-file.t | 81 ++++ t/old-relbase.t | 37 ++ t/path-segments.t | 33 ++ t/pop.t | 50 ++ t/punycode.t | 56 +++ t/query-param.t | 71 +++ t/query.t | 81 ++++ t/rel.t | 21 + t/rfc2732.t | 59 +++ t/roy-test.t | 44 ++ t/roytest1.html | 194 ++++++++ t/roytest2.html | 100 ++++ t/roytest3.html | 89 ++++ t/roytest4.html | 98 ++++ t/roytest5.html | 92 ++++ t/rsync.t | 23 + t/rtsp.t | 43 ++ t/sip.t | 69 +++ t/sort-hash-query-form.t | 22 + t/split.t | 59 +++ t/storable-test.pl | 27 ++ t/storable.t | 16 + t/urn-isbn.t | 62 +++ t/urn-oid.t | 24 + t/utf8.t | 20 + uri-test | 58 +++ 109 files changed, 11853 insertions(+) create mode 100644 Changes create mode 100644 LICENSE create mode 100644 MANIFEST create mode 100644 META.json create mode 100644 META.yml create mode 100644 Makefile.PL create mode 100644 README create mode 100644 lib/URI.pm create mode 100644 lib/URI/Escape.pm create mode 100644 lib/URI/Heuristic.pm create mode 100644 lib/URI/IRI.pm create mode 100644 lib/URI/QueryParam.pm create mode 100644 lib/URI/Split.pm create mode 100644 lib/URI/URL.pm create mode 100644 lib/URI/WithBase.pm create mode 100644 lib/URI/_foreign.pm create mode 100644 lib/URI/_generic.pm create mode 100644 lib/URI/_idna.pm create mode 100644 lib/URI/_ldap.pm create mode 100644 lib/URI/_login.pm create mode 100644 lib/URI/_punycode.pm create mode 100644 lib/URI/_query.pm create mode 100644 lib/URI/_segment.pm create mode 100644 lib/URI/_server.pm create mode 100644 lib/URI/_userpass.pm create mode 100644 lib/URI/data.pm create mode 100644 lib/URI/file.pm create mode 100644 lib/URI/file/Base.pm create mode 100644 lib/URI/file/FAT.pm create mode 100644 lib/URI/file/Mac.pm create mode 100644 lib/URI/file/OS2.pm create mode 100644 lib/URI/file/QNX.pm create mode 100644 lib/URI/file/Unix.pm create mode 100644 lib/URI/file/Win32.pm create mode 100644 lib/URI/ftp.pm create mode 100644 lib/URI/gopher.pm create mode 100644 lib/URI/http.pm create mode 100644 lib/URI/https.pm create mode 100644 lib/URI/ldap.pm create mode 100644 lib/URI/ldapi.pm create mode 100644 lib/URI/ldaps.pm create mode 100644 lib/URI/mailto.pm create mode 100644 lib/URI/mms.pm create mode 100644 lib/URI/news.pm create mode 100644 lib/URI/nntp.pm create mode 100644 lib/URI/pop.pm create mode 100644 lib/URI/rlogin.pm create mode 100644 lib/URI/rsync.pm create mode 100644 lib/URI/rtsp.pm create mode 100644 lib/URI/rtspu.pm create mode 100644 lib/URI/sftp.pm create mode 100644 lib/URI/sip.pm create mode 100644 lib/URI/sips.pm create mode 100644 lib/URI/snews.pm create mode 100644 lib/URI/ssh.pm create mode 100644 lib/URI/telnet.pm create mode 100644 lib/URI/tn3270.pm create mode 100644 lib/URI/urn.pm create mode 100644 lib/URI/urn/isbn.pm create mode 100644 lib/URI/urn/oid.pm create mode 100644 t/abs.t create mode 100644 t/clone.t create mode 100644 t/cwd.t create mode 100644 t/data.t create mode 100644 t/distmanifest.t create mode 100644 t/escape-char.t create mode 100644 t/escape.t create mode 100644 t/file.t create mode 100644 t/ftp.t create mode 100644 t/generic.t create mode 100644 t/gopher.t create mode 100644 t/heuristic.t create mode 100644 t/http.t create mode 100644 t/idna.t create mode 100644 t/iri.t create mode 100644 t/ldap.t create mode 100644 t/mailto.t create mode 100644 t/mix.t create mode 100644 t/mms.t create mode 100644 t/news.t create mode 100644 t/num_eq.t create mode 100644 t/old-absconf.t create mode 100644 t/old-base.t create mode 100644 t/old-file.t create mode 100644 t/old-relbase.t create mode 100755 t/path-segments.t create mode 100644 t/pop.t create mode 100644 t/punycode.t create mode 100644 t/query-param.t create mode 100644 t/query.t create mode 100644 t/rel.t create mode 100644 t/rfc2732.t create mode 100644 t/roy-test.t create mode 100644 t/roytest1.html create mode 100644 t/roytest2.html create mode 100644 t/roytest3.html create mode 100644 t/roytest4.html create mode 100644 t/roytest5.html create mode 100644 t/rsync.t create mode 100644 t/rtsp.t create mode 100644 t/sip.t create mode 100644 t/sort-hash-query-form.t create mode 100644 t/split.t create mode 100644 t/storable-test.pl create mode 100644 t/storable.t create mode 100644 t/urn-isbn.t create mode 100644 t/urn-oid.t create mode 100644 t/utf8.t create mode 100755 uri-test diff --git a/Changes b/Changes new file mode 100644 index 0000000..9c1c90c --- /dev/null +++ b/Changes @@ -0,0 +1,1009 @@ +Revision history for URI + +2015-07-25 Karen Etheridge + + Release 1.69 + + Karen Etheridge: + - add $VERSIONs for all modules that lack them + + Olaf Alders: + - add missing documentation for URI::sftp + - Clarify use of query_param() method + + +2015-06-25 Karen Etheridge + + Release 1.68 + + Kent Fredric: + - Sort hash keys to make generated query predictable + + Slaven Rezic: + - Add new tests for path segments + + Brendan Byrd: + - Add sftp scheme + + +2015-02-24 Karen Etheridge + + Release 1.67 + + Karen Etheridge: + - properly skip author test for normal user installs + + +2015-02-24 Karen Etheridge + + Release 1.66 + + Adam Herzog: + - reorganize .pm files under lib/ (github #20) + + +2014-11-05 Karen Etheridge + + Release 1.65 + + Karen Etheridge: + - add a TO_JSON method, to assist JSON serialization + + +2014-07-13 Karen Etheridge + + Release 1.64 + + Eric Brine: + - better fix for RT#96941, that also works around utf8 bugs on older perls + + +2014-07-13 Karen Etheridge + + Release 1.63 + + Karen Etheridge: + - mark utf8-related test failures on older perls caused by recent string + parsing changes as TODO (RT#97177, RT#96941) + + +2014-07-12 Karen Etheridge + + Release 1.62 + + Karen Etheridge (2): + - use strict and warnings in all modules, tests and scripts + - remove all remaining uses of "use vars" + + Eric Brine: + - fixed new "\C is deprecated in regex" warning in 5.21.2 (RT#96941) + + +2014-07-01 Karen Etheridge + + Release 1.61 + + David Schmidt: + Fix test failure if local hostname is 'foo' [RT#75519] + + Gisle Aas (2): + New 'has_recognized_scheme' interface [RT#71204] + Interfaces that return a single value now return undef rather than an + empty list in list context + + Slaven Rezic: + Fix bad regex when parsing hostnames + + Piotr Roszatycki: + Preferentially use $ENV{TMPDIR} for temporary test files over /tmp + (fixes tests on Android) + + + +2012-03-25 Gisle Aas + + Release 1.60 + + Gisle Aas (3): + Merge pull request #4 from hiratara/fix-repourl + Updated repository URL + Avoid failure if the local hostname is 'foo' [RT#75519] + + Masahiro Honma (1): + Fix the URL of the repository. + + Matt Lawrence (1): + Do not reverse the order of new parameters + + Peter Rabbitson (1): + Fix RT#59274 - courtesy of a stupid 5.8.[12] join bug + + + + +2011-08-15 Gisle Aas + + Release 1.59 + + Make sure accessor methods don't return utf8::upgraded() bytes + for URLs initialized from Unicode strings. + + Version number increments. + + Documentation tweaks. + + + +2011-01-23 Gisle Aas + + Release 1.58 + + This release reverts the patch in 1.57 that made query_form distingush + between empty and undef values. It broke stuff. [RT#62708] + + + +2011-01-22 Gisle Aas + + Release 1.57 + + Mark Stosberg (8): + typo fix: s/do deal/to deal/ + best practice: s/foreach /for / + Whitespace: fix inconsistent use of tabs vs spaces + Code style: fix inconsistency with subroutine braces at the end of the line vs below it. + Modernize: s/use vars/our/ ... since we require 5.6 as a minimum version now + Whitespace: fix indentation so blocks are consistently indented + Add formal terms "Percent-encode" and "Percent-decode" to the NAME and description to match the RFC + Drop support for Perl < 5.8.1 Perl 5.8 was released almost 10 years ago. It's time. + + Gisle Aas (6): + Convert test to use Test::More + Adjust tests for query_form + Avoid "Use of uninitialized value"-noise from query_form + State test dependencies [RT#61538] + We also depend on ExtUtils::MakeMaker + State 5.8 dependency in the META.yml file + + Ville Skyttä (2): + Guess HTTPS and FTP from URI::Heuristic input with port but no scheme. + Try harder to guess scheme from hostnames besides just "$scheme.*" ones. + + John Miller (1): + Distingush between empty and undef values in query_form [RT#62708] + + + +2010-10-06 Gisle Aas + + Release 1.56 + + Don't depend on DNS for the heuristics test + + + +2010-09-01 Gisle Aas + + Release 1.55 + + Gisle Aas (2): + Treat ? as a reserved character in file: URIs + " is not a URI character [RT#56421] + + Torsten Frtsch (1): + Avoid test failure unless defined $Config{useperlio} + + + +2010-03-31 Gisle Aas + + Release 1.54 + + Alex Kapranoff (1): + Fix heuristic test fails on hosts in .su (or .uk) domains [RT#56135] + + + +2010-03-14 Gisle Aas + + Release 1.53 + + Ville Skyttä (6): + Remove unneeded execute permissions. + Add $uri->secure() method. + Documentation and comment spelling fixes. + Fix heuristics when COUNTRY is set to "gb". + Use HTTP_ACCEPT_LANGUAGE, LC_ALL, and LANG in country heuristics. + POD linking improvements. + + Michael G. Schwern (2): + Rewrite the URI::Escape tests with Test::More + Update URI::Escape for RFC 3986 + + Gisle Aas (1): + Bump MIN_PERL_VERSION to 5.6.1 [RT#54078] + + Salvatore Bonaccorso (1): + Suppress wide caracters warnings in iri.t [RT#53737] + + + +2009-12-30 Gisle Aas + + Release 1.52 + + Gisle Aas (7): + Encode::decode('UTF-8',...) with callback implemented in 2.39 + %%host%% hack can be removed when URI::_server::as_iri works + Don't croak on IRIs that can't be IDNA encoded + IDNA roundtrip test on wrong variable + Check behaviour when feeded URI constructor Latin-1 chars + Add some test examples from draft-duerst-iri-bis.txt + Need to recognize lower case hex digits as well + + + +2009-11-23 Gisle Aas + + Release 1.51 + + Fixup a test that was broken on Windows + + + +2009-11-21 Gisle Aas + + Release 1.50 + + The main news in this release is the initial attempt at providing + support to IRIs. URI objects now support the 'as_iri' and 'ihost' + methods. + + Gisle Aas (28): + Added more tests for setting IPv6 addresses using the host method + Document how the host methods deal with IPv6 addresses + A "test case" to start IDNA prototype from + Escape IDNA hostnames + Introduce the as_unicode method + Make as_unicode undo punycode for server URLs + An IRI class might be helpful (RFC 3987) + Must punycode each part of the domain name separately + Include initial private Punycode module + Get URI::_punycode working + punycode of plain ascii should not edit with "-" + Some more tests from RFC 3492 + Add private URI::_idna module based on encodings/idna.py + Start using URI::_idna for encoding of URIs + Avoid various use of undef warnings + Fix test affected by IDNA + Keep reference to IDNA::Punycode in the URI::_punycode docs + Ensure upgraded strings as input + Update manifest with the new idna/punycode files + Rename as_unicde to as_iri + draft-duerst-iri-bis-07: The proposed RFC 3987 update + Load Encode when its used + Rename host_unicode as ihost + Add basic iri test + Hack to make as_iri turn A-labels into U-labels + Make as_iri leave escapes not forming valid UTF-8 sequences + Merge branch 'iri' + Don't include RFCs in the cpan tarball + + Michael G. Schwern (3): + Fix != overloading to match == + Note that mailto does not contain a host() and this is not a bug. + Strip brackets off IPv6 hosts [RT#34309] + + + +2009-08-14 Gisle Aas + + Release 1.40 + + Even stricter test for working DNS, 2nd try. + + + +2009-08-13 Gisle Aas + + Release 1.39 + + Even stricter test for working DNS, hopefully this gets rid of the rest of + the heuristics.t failures. + + + +2009-05-27 Gisle Aas + + Release 1.38 + + Ville Skyttä (3): + Spelling fixes. + + Tatsuhiko Miyagawa (1): + skip DNS test if wildcard domain catcher (e.g. OpenDNS) is there + + Gisle Aas (1): + Avoid "Insecure $ENV{PATH} while running with -T switch" error with perl-5.6. + + + +2008-06-16 Gisle Aas + + Release 1.37 + + Gisle Aas (1): + Support ";" delimiter in $u->query_form + + Jan Dubois (1): + We get different test result when www.perl.com doesn't resolve. + + Kenichi Ishigaki (1): + URI::Heuristic didn't work for generic country code [RT#35156] + + + +2008-04-03 Gisle Aas + + Release 1.36 + + : Escape Unicode strings as UTF-8. + + Bjoern Hoehrmann : fixed URL encoded data: URLs + + GAAS: uri_escape_utf8() now exported by default as documented. + + BDFOY: Test fails with Business::ISBN installed [RT#33220] + + JDHEDDEN: lc(undef) reports warning in blead [RT#32742] + + GEOFFR: add some tests for gopher URIs [RT#29211] + + + +2004-11-05 Gisle Aas + + Release 1.35 + + Documentation update. + + Simplified uri_escape_utf8 implementation. No need to load the + Encode module. Contributed by Alexey Tourbin. + + Work around bug in perl-5.6.0 that made t/query.t fail. + + + +2004-10-05 Gisle Aas + + Release 1.34 + + URI->canonical will now always unescape any escaped unreserved + chars. Previously this only happened for the http and https scheme. + Patch contributed by Eric Promislow . + + + +2004-09-19 Gisle Aas + + Release 1.33 + + URI::file->canonical will now try to change the 'authority' + to the default one. + + Fix heuristic test. Apparently www.perl.co.uk is no more. + + + +2004-09-07 Gisle Aas + + Release 1.32 + + Introduce $URI::file::DEFAULT_AUTHORITY which control what + authority string to use for absolute file URIs. Its value + default to "" which produce file URIs that better interoperates + with other implementations. The old mapping behaviour can be + requested by setting this variable to undef. + + + +2004-06-08 Gisle Aas + + Release 1.31 + + Added uri_escape_utf8() function to URI::Escape module. + + Fixed abs/rel behaviour for sip: URIs. Fixed by + Ville Skyttä . + + Avoid croaking on code like $u->query_form(a => { foo => 1 }). + It will still not really do anything useful. + + + +2004-01-14 Gisle Aas + + Release 1.30 + + Documentation fixes by Paul Croome . + + + +2004-01-02 Gisle Aas + + Release 1.29 + + Added support for the ldapi: and ldaps: schemes. + The ldaps: implementation was contributed by Graham Barr. + + Added support for mms: scheme. Contributed by + Dan Sully . + + + +2003-11-30 Gisle Aas + + Release 1.28 + + The query_param_delete() method was not able to delete + the last parameter from a form. Similar problem existing + when deleting via query_param(). Patch by . + + The query_form() method now allow an array or hash + reference to be passed to set the value. This makes it + possible to set the value to an empty form, something that + the old API did not allow. + + The query_keywords() method now allow an array reference + to be passed to set the value. + + + +2003-10-06 Gisle Aas + + Release 1.27 + + The URI module is now less strict about the values accepted + for gopher_type attribute of gopher:-URLs. Patch suggested + by the Net::Gopher author; William G. Davis. + + + +2003-10-03 Gisle Aas + + Release 1.26 + + Help Storable deal with URI objects. Patch contributed + by . + + Fix failure under OS/2. Patch contributed by Ilya Zakharevich. + + + +2003-08-18 Gisle Aas + + Release 1.25 + + Allow literal '@' in userinfo. If there are multiple '@' chars + in the 'authority' component use the last (instead of first) as + the 'userinfo' delimiter. + + Make URI->query_form escape '[' and ']'. These chars where added + to the reserved set in RFC 2732. This also matches MSIE behaviour. + + Silence warning from 'sip' support class. + + + +2003-07-24 Gisle Aas + + Release 1.24 + + Relative URIs that start with the query string directly (i.e. "?q") + are now absolutized as specified in rfc2396bis. See: + http://www.apache.org/~fielding/uri/rev-2002/issues.html#003-relative-query + + Added URI::Split module. It's a lightweight module that can be + used to parse and compose URI string to/from its component parts. + + The rel() method will now work from canonical URIs. That allow it + to extract a relative URI in more cases. + + + +2003-01-01 Gisle Aas + + Release 1.23 + + Support for tn3270 URIs. + + Use anchored DNS lookups in URI::Heuristic as suggested + by Malcolm Weir . + + Delay calculation of MY_COUNTRY() in URI::Heuristic. + Patch by Ed Avis . + + Make test suite work for UNC paths. + Patch by Warren Jones . + + + +2002-09-02 Gisle Aas + + Release 1.22 + + Added URI::QueryParam module. It contains some + extra methods to manipulate the query form + key/value pairs. + + Added support for the sip: and sips: URI scheme. + Contributed by Ryan Kereliuk . + + + +2002-08-04 Gisle Aas + + Release 1.21 + + Restore perl-5.004 and perl-5.005 compatibility. + + + +2002-07-18 Gisle Aas + + Release 1.20 + + Direct support for some new schemes urn:, urn:isbn:, + urn:oid:, rtsp:, and rtspu:. The rtsp support was + contributed by Matt Selsky . + + Documentation fix for $URI::ABS_REMOTE_LEADING_DOTS. + CPAN-RT-Bug #1224. + + The host for URI::file was not unescaped. + Patch by Ville Skyttä . + + + +2002-05-09 Gisle Aas + + Release 1.19 + + URI::Heuristic will guess better on strings + like "123.3.3.3:8080/foo". It used to think that + the numbers before ":" was a scheme. + + URI::WithBase will not keep the full history of + any base URI's base URI etc. This used to make + these objects grow into to monsters for some + web spiders. + + URI::URL->new("foo", "bar")->base used to return + a "URI" object. Now an URI::URL object is returned + instead. + + Deal properly with file:///-URIs. + + + +2001-12-30 Gisle Aas + + Release 1.18 + + Added support for ssh: URIs. + Contributed by Jean-Philippe Bouchard + + URI::Escape: Make sure cache is not set when the RE + wouldn't compile. Fix suggested by . + Applied patch as suggested by Randal L. Schwartz. + + Don't try to come up with the e-mail address of the user as + the anonymous password. + Patch by Eduardo Pérez . + + + +2001-09-14 Gisle Aas + + Release 1.17 + + Fixed unescape of %30 in $http_uri->canonical. + + Fixed test failure for t/heuristic.t on cygwin. + + Fixed warning noise from t/old-base.t on bleadperl. + Perl now warns for pack("c*", $i) when $i > 127. + + + +2001-08-27 Gisle Aas + + Release 1.16 + + URI::Escape::uri_escape default has changed. Reserved + characters are now escaped when no second argument is + provided. + + The perl5.004 backwards compatibility patching taking place + in the Makefile.PL should now work for MacPerl. + Patch by KIMURA Takeshi . + + URI::WithBase now overrides the can() method and delegate it to + the URI member. This also affects the URI::URL behaviour. + Patch by Sean M. Burke . + + + +2001-07-19 Gisle Aas + + Release 1.15 + + [This release was made just to document the changes that went + into the (unreleased) URI-1.13 but never made it into this + change-log. There is no functional difference between the 1.14 + and 1.15 release.] + + + +2001-07-18 Gisle Aas + + Release 1.14 + + The module failed on perl5.004 because \z is not supported + in regexps. The Makefile.PL will now try to patch the module + to be compatible. + + + +2001-05-15 Gisle Aas + + Release 1.13 (never made it to CPAN) + + URI.pm now conforms to RFC 2732 which specify how literal IPv6 + addresses are to be included in URLs. + + URI/Escape now allows "/" in the $unsafe pattern argument. + + + +2001-04-23 Gisle Aas + + Release 1.12 + + URI->new($u, $scheme) does no longer fail if given a badly + formatted scheme string. + + URI::WithBase's clone and base method was basically just + broken. This also affected the URI::URL subclass. + The clone() method did not copy the base, and updating + the base with the base method always set it to "1". + + + +2001-02-27 Gisle Aas + + Release 1.11 + + The t/heuristic.t test relied on the fact that 'www.perl.no' + was not registered in DNS. This is no longer true. + The penguins at Bouvet Island will hopefully be ignorant + of Perl forever. + + + +2001-01-10 Gisle Aas + + Release 1.10 + + The $u->query_form method will now escape spaces in + form keys or values as '+' (instead of '%20'). This also + affect the $mailto_uri->header() method. This is actually + the wrong thing to do, but this practise is now even + documented in official places like + http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 + so we might as well follow the stream. + + URI::Heuristic did not work for domain-names with dashes '-' + in them. Fixed. + + Documented that $uri->xxx($1) might not work. + + + +2000-08-16 Gisle Aas + + Release 1.09 + + uri_unescape() did not work when given multiple strings + to decode. Patch by Nicholas Clark . + + + +2000-08-02 Gisle Aas + + Release 1.08 + + ldap URIs now support _scope() and _filter() methods that + don't have default values. Suggested by Graham Barr. + + Incorporated old rejected MSWin32 patch to t/old-base.t. + Hope it works. + + + +2000-06-13 Gisle Aas + + Release 1.07 + + URI::WithBase (and URI::URL) now support $u->new_abs + constructor. + + URI::WithBase->new("foo", "URI::URL") bug fixed. + + + +2000-04-09 Gisle Aas + + Release 1.06 + + Clean test/install on VMS. + Patch by Charles Lane + + + +2000-02-14 Gisle Aas + + Release 1.05 + + QNX file support by Norton Allen . + + Support for rsync:-URI by Dave Beckett + + + +1999-08-03 Gisle Aas + + Release 1.04 + + Avoid testing for defined(@ISA) and defined(%class::). Patch + by Nathan Torkington . + + $uri->abs() did wrong when the fragment contained a "?" + character. + + Typo in URI::ldap spotted by Graham Barr. + + + +1999-06-24 Gisle Aas + + Release 1.03 + + Escape all reserved query characters in the individual components + of $uri->query_form and $uri->query_keywords. + + Make compatibility URI::URL->new("mailto:gisle@aas.no")->netloc + work again. + + + +1999-03-26 Gisle Aas + + Release 1.02 + + Added URI::ldap. Contributed by Graham Barr . + + Documentation update. + + + +1999-03-20 Gisle Aas + + Release 1.01 + + MacOS patches from Paul J. Schinder + + Documentation patch from Michael A. Chase + + + +1998-11-19 Gisle Aas + + Release 1.00 + + Added new URI->new_abs method + + Replaced a few die calls with croak. + + + +1998-10-12 Gisle Aas + + Release 0.90_02 + + Implemented new $uri->host_port method. + + $uri->epath and $uri->equery aliases to make URI::URL + compatibility easier. + + + +1998-09-23 Gisle Aas + + Release 0.90_01 + + New README + + Makefile.PL list MIME::Base64 as PREREQ_PM + + Original $scheme argument not passed to _init() method. + + Automatically add scheme to empty URIs where the scheme + is required: URI->new("", "data") + + Documentation update. + + New URI::URL::strict implementation. + + + +1998-09-22 Gisle Aas + + Release 0.09_02 + + New internal URI::file::* interface. Implemented 8.3 mapping + for "dos". + + Got rid of $URI::STRICT and $URI::DEFAULT_SCHEME + + More documentation. + + + +1998-09-13 Gisle Aas + + Release 0.09_01 + + Use version number with underscore to avoid that the CPAN + indexer hides the URI::URL from libwww-perl that contains + all the documentation. + + Started to document the new modules. + + URI::file->new() escape fix which allow Mac file names like + ::.. to be treated as they should (I think). + + + +1998-09-12 Gisle Aas + + Release 0.09 + + Included URI::Escape and URI::Heuristic from LWP. URI::Escape + updated with new default set of characters to escape (according + to RFC 2396) and a faster uri_unescape() function. URI::Heuristic + updated with a new function that returns an URI object. + + First argument to URI->new is always treated as a string now. + + URI->new("", URI::WithBase("foo:")) now works. It returns an + URI::WithBase object. + + Included Roy T. Fielding's URI parsing/abs tests from + . We did in fact agree + with RFC 2396 on all tests. + + Allow authority "A|" in Win32 file:-URIs to denote A:. Treat + escaped chars. + + + +1998-09-10 Gisle Aas + + Release 0.08 + + Implemented transformations between various file: URIs and + actual file names. New URI::file methods: + + new + new_abs + cwd + file + dir + + + +1998-09-09 Gisle Aas + + Release 0.07 + + Implemented rlogin, telnet and file URLs. + + Implemented URI::WithBase + + Implemented URI::URL emulator (ported old URI::URL test suite) + + Can now use schemes with "-", "+" or "." characters in them. + + $u->scheme will downcase. $u->_scheme will keep it as it is. + + Configuration variables for $u->abs + + $u->query_form and $u->query_keyword is more careful about escaping + "+" and "=". + + $u->host unescaped + + $u->_port if you want to bypass $u->default_port + + Can handle news message-ids with embedded "/" now + + + +1998-09-08 Gisle Aas + + Release 0.06 + + Implemented gopher URLs + + Implemented ftp URLs + + Second ctor argument can be a plain scheme name. If it is an + object, then we use the class of the object as implementor. + + Protect literal % in various places by escaping + + Path segments with parameters are not arrays of class URI::_segment, + which overloads the stringify operator. + + URI::http->canonical will now unescape unreserved characters. + + + +1998-09-08 Gisle Aas + + Release 0.05 + + Implemented news URLs (together with snews/nntp) + + Implemented pop URLs (RFC 2384) + + Can now use '==' to compare if two URI objects are the same or not. + + $u->opaque_part renamed as $u->opaque + + Better canonicalization + + Faster $u->abs (especially for URI that already are absolute) + + $u->query_form will keep more chars unescaped + + + +1998-09-06 Gisle Aas + + Release 0.04 + + Implemented mailto:-URLs (specified in RFC 2368) + + Moved query() methods to internal URI::_query mixin class. + + Escape stuff in the media_type field of data:-URLs. + + + +1998-09-06 Gisle Aas + + Release 0.03 based on simplified scalar object. + + + +1998-09-02 Gisle Aas + + Release 0.02 based on perl5.005 and fields.pm + + + +1998-04-10 Gisle Aas + + Release 0.01 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0faf80a --- /dev/null +++ b/LICENSE @@ -0,0 +1,379 @@ +This software is copyright (c) 1998 by Gisle Aas. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +Terms of the Perl programming language system itself + +a) the GNU General Public License as published by the Free + Software Foundation; either version 1, or (at your option) any + later version, or +b) the "Artistic License" + +--- The GNU General Public License, Version 1, February 1989 --- + +This software is Copyright (c) 1998 by Gisle Aas. + +This is free software, licensed under: + + The GNU General Public License, Version 1, February 1989 + + GNU GENERAL PUBLIC LICENSE + Version 1, February 1989 + + Copyright (C) 1989 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The license agreements of most software companies try to keep users +at the mercy of those companies. By contrast, our General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. The +General Public License applies to the Free Software Foundation's +software and to any other program whose authors commit to using it. +You can use it for your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Specifically, the General Public License is designed to make +sure that you have the freedom to give away or sell copies of free +software, that you receive source code or can get it if you want it, +that you can change the software or use pieces of it in new free +programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of a such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must tell them their rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any program or other work which +contains a notice placed by the copyright holder saying it may be +distributed under the terms of this General Public License. The +"Program", below, refers to any such program or work, and a "work based +on the Program" means either the Program or any work containing the +Program or a portion of it, either verbatim or with modifications. Each +licensee is addressed as "you". + + 1. You may copy and distribute verbatim copies of the Program's source +code as you receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice and +disclaimer of warranty; keep intact all the notices that refer to this +General Public License and to the absence of any warranty; and give any +other recipients of the Program a copy of this General Public License +along with the Program. You may charge a fee for the physical act of +transferring a copy. + + 2. You may modify your copy or copies of the Program or any portion of +it, and copy and distribute such modifications under the terms of Paragraph +1 above, provided that you also do the following: + + a) cause the modified files to carry prominent notices stating that + you changed the files and the date of any change; and + + b) cause the whole of any work that you distribute or publish, that + in whole or in part contains the Program or any part thereof, either + with or without modifications, to be licensed at no charge to all + third parties under the terms of this General Public License (except + that you may choose to grant warranty protection to some or all + third parties, at your option). + + c) If the modified program normally reads commands interactively when + run, you must cause it, when started running for such interactive use + in the simplest and most usual way, to print or display an + announcement including an appropriate copyright notice and a notice + that there is no warranty (or else, saying that you provide a + warranty) and that users may redistribute the program under these + conditions, and telling the user how to view a copy of this General + Public License. + + d) You may charge a fee for the physical act of transferring a + copy, and you may at your option offer warranty protection in + exchange for a fee. + +Mere aggregation of another independent work with the Program (or its +derivative) on a volume of a storage or distribution medium does not bring +the other work under the scope of these terms. + + 3. You may copy and distribute the Program (or a portion or derivative of +it, under Paragraph 2) in object code or executable form under the terms of +Paragraphs 1 and 2 above provided that you also do one of the following: + + a) accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of + Paragraphs 1 and 2 above; or, + + b) accompany it with a written offer, valid for at least three + years, to give any third party free (except for a nominal charge + for the cost of distribution) a complete machine-readable copy of the + corresponding source code, to be distributed under the terms of + Paragraphs 1 and 2 above; or, + + c) accompany it with the information you received as to where the + corresponding source code may be obtained. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form alone.) + +Source code for a work means the preferred form of the work for making +modifications to it. For an executable file, complete source code means +all the source code for all modules it contains; but, as a special +exception, it need not include source code for modules which are standard +libraries that accompany the operating system on which the executable +file runs, or for standard header files or definitions files that +accompany that operating system. + + 4. You may not copy, modify, sublicense, distribute or transfer the +Program except as expressly provided under this General Public License. +Any attempt otherwise to copy, modify, sublicense, distribute or transfer +the Program is void, and will automatically terminate your rights to use +the Program under this License. However, parties who have received +copies, or rights to use copies, from you under this General Public +License will not have their licenses terminated so long as such parties +remain in full compliance. + + 5. By copying, distributing or modifying the Program (or any work based +on the Program) you indicate your acceptance of this license to do so, +and all its terms and conditions. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the original +licensor to copy, distribute or modify the Program subject to these +terms and conditions. You may not impose any further restrictions on the +recipients' exercise of the rights granted herein. + + 7. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of the license which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +the license, you may choose any version ever published by the Free Software +Foundation. + + 8. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to humanity, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + + To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey +the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19xx name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the +appropriate parts of the General Public License. Of course, the +commands you use may be called something other than `show w' and `show +c'; they could even be mouse-clicks or menu items--whatever suits your +program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + program `Gnomovision' (a program to direct compilers to make passes + at assemblers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +That's all there is to it! + + +--- The Artistic License 1.0 --- + +This software is Copyright (c) 1998 by Gisle Aas. + +This is free software, licensed under: + + The Artistic License 1.0 + +The Artistic License + +Preamble + +The intent of this document is to state the conditions under which a Package +may be copied, such that the Copyright Holder maintains some semblance of +artistic control over the development of the package, while giving the users of +the package the right to use and distribute the Package in a more-or-less +customary fashion, plus the right to make reasonable modifications. + +Definitions: + + - "Package" refers to the collection of files distributed by the Copyright + Holder, and derivatives of that collection of files created through + textual modification. + - "Standard Version" refers to such a Package if it has not been modified, + or has been modified in accordance with the wishes of the Copyright + Holder. + - "Copyright Holder" is whoever is named in the copyright or copyrights for + the package. + - "You" is you, if you're thinking about copying or distributing this Package. + - "Reasonable copying fee" is whatever you can justify on the basis of media + cost, duplication charges, time of people involved, and so on. (You will + not be required to justify it to the Copyright Holder, but only to the + computing community at large as a market that must bear the fee.) + - "Freely Available" means that no fee is charged for the item itself, though + there may be fees involved in handling the item. It also means that + recipients of the item may redistribute it under the same conditions they + received it. + +1. You may make and give away verbatim copies of the source form of the +Standard Version of this Package without restriction, provided that you +duplicate all of the original copyright notices and associated disclaimers. + +2. You may apply bug fixes, portability fixes and other modifications derived +from the Public Domain or from the Copyright Holder. A Package modified in such +a way shall still be considered the Standard Version. + +3. You may otherwise modify your copy of this Package in any way, provided that +you insert a prominent notice in each changed file stating how and when you +changed that file, and provided that you do at least ONE of the following: + + a) place your modifications in the Public Domain or otherwise make them + Freely Available, such as by posting said modifications to Usenet or an + equivalent medium, or placing the modifications on a major archive site + such as ftp.uu.net, or by allowing the Copyright Holder to include your + modifications in the Standard Version of the Package. + + b) use the modified Package only within your corporation or organization. + + c) rename any non-standard executables so the names do not conflict with + standard executables, which must also be provided, and provide a separate + manual page for each non-standard executable that clearly documents how it + differs from the Standard Version. + + d) make other distribution arrangements with the Copyright Holder. + +4. You may distribute the programs of this Package in object code or executable +form, provided that you do at least ONE of the following: + + a) distribute a Standard Version of the executables and library files, + together with instructions (in the manual page or equivalent) on where to + get the Standard Version. + + b) accompany the distribution with the machine-readable source of the Package + with your modifications. + + c) accompany any non-standard executables with their corresponding Standard + Version executables, giving the non-standard executables non-standard + names, and clearly documenting the differences in manual pages (or + equivalent), together with instructions on where to get the Standard + Version. + + d) make other distribution arrangements with the Copyright Holder. + +5. You may charge a reasonable copying fee for any distribution of this +Package. You may charge any fee you choose for support of this Package. You +may not charge a fee for this Package itself. However, you may distribute this +Package in aggregate with other (possibly commercial) programs as part of a +larger (possibly commercial) software distribution provided that you do not +advertise this Package as a product of your own. + +6. The scripts and library files supplied as input to or produced as output +from the programs of this Package do not automatically fall under the copyright +of this Package, but belong to whomever generated them, and may be sold +commercially, and may be aggregated with this Package. + +7. C or perl subroutines supplied by you and linked into this Package shall not +be considered part of this Package. + +8. The name of the Copyright Holder may not be used to endorse or promote +products derived from this software without specific prior written permission. + +9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +The End + diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..a7ff67a --- /dev/null +++ b/MANIFEST @@ -0,0 +1,109 @@ +Changes +lib/URI.pm +lib/URI/_foreign.pm +lib/URI/_generic.pm +lib/URI/_idna.pm +lib/URI/_ldap.pm +lib/URI/_login.pm +lib/URI/_punycode.pm +lib/URI/_query.pm +lib/URI/_segment.pm +lib/URI/_server.pm +lib/URI/_userpass.pm +lib/URI/data.pm +lib/URI/Escape.pm +lib/URI/file.pm +lib/URI/file/Base.pm +lib/URI/file/FAT.pm +lib/URI/file/Mac.pm +lib/URI/file/OS2.pm +lib/URI/file/QNX.pm +lib/URI/file/Unix.pm +lib/URI/file/Win32.pm +lib/URI/ftp.pm +lib/URI/gopher.pm +lib/URI/Heuristic.pm +lib/URI/http.pm +lib/URI/https.pm +lib/URI/IRI.pm +lib/URI/ldap.pm +lib/URI/ldapi.pm +lib/URI/ldaps.pm +lib/URI/mailto.pm +lib/URI/mms.pm +lib/URI/news.pm +lib/URI/nntp.pm +lib/URI/pop.pm +lib/URI/QueryParam.pm +lib/URI/rlogin.pm +lib/URI/rsync.pm +lib/URI/rtsp.pm +lib/URI/rtspu.pm +lib/URI/sftp.pm +lib/URI/sip.pm +lib/URI/sips.pm +lib/URI/snews.pm +lib/URI/Split.pm +lib/URI/ssh.pm +lib/URI/telnet.pm +lib/URI/tn3270.pm +lib/URI/URL.pm +lib/URI/urn.pm +lib/URI/urn/isbn.pm +lib/URI/urn/oid.pm +lib/URI/WithBase.pm +LICENSE +Makefile.PL +MANIFEST +README +t/abs.t +t/clone.t +t/cwd.t +t/data.t +t/distmanifest.t +t/escape-char.t +t/escape.t +t/file.t +t/ftp.t +t/generic.t +t/gopher.t +t/heuristic.t +t/http.t +t/idna.t +t/iri.t +t/ldap.t +t/mailto.t +t/mix.t +t/mms.t +t/news.t +t/num_eq.t +t/old-absconf.t +t/old-base.t +t/old-file.t +t/old-relbase.t +t/path-segments.t +t/pop.t +t/punycode.t +t/query-param.t +t/query.t +t/rel.t +t/rfc2732.t +t/roy-test.t +t/roytest1.html +t/roytest2.html +t/roytest3.html +t/roytest4.html +t/roytest5.html +t/rsync.t +t/rtsp.t +t/sip.t +t/sort-hash-query-form.t +t/split.t +t/storable-test.pl +t/storable.t +t/urn-isbn.t +t/urn-oid.t +t/utf8.t +uri-test +META.yml Module YAML meta-data (added by MakeMaker) +META.json Module JSON meta-data (added by MakeMaker) diff --git a/META.json b/META.json new file mode 100644 index 0000000..6635cb6 --- /dev/null +++ b/META.json @@ -0,0 +1,91 @@ +{ + "abstract" : "Uniform Resource Identifiers (absolute and relative)", + "author" : [ + "Gisle Aas " + ], + "dynamic_config" : 0, + "generated_by" : "ExtUtils::MakeMaker version 7.0525, CPAN::Meta::Converter version 2.150005", + "license" : [ + "perl_5" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "name" : "URI", + "no_index" : { + "directory" : [ + "t", + "inc" + ] + }, + "prereqs" : { + "build" : { + "requires" : {} + }, + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "runtime" : { + "requires" : { + "Exporter" : "5.57", + "MIME::Base64" : "2", + "Scalar::Util" : "0", + "parent" : "0", + "perl" : "5.008001", + "utf8" : "0" + } + }, + "test" : { + "requires" : { + "File::Temp" : "0", + "Test::More" : "0.96" + } + } + }, + "release_status" : "stable", + "resources" : { + "bugtracker" : { + "mailto" : "bug-URI@rt.cpan.org", + "web" : "https://rt.cpan.org/Public/Dist/Display.html?Name=URI" + }, + "repository" : { + "type" : "git", + "url" : "https://github.com/libwww-perl/uri.git", + "web" : "https://github.com/libwww-perl/uri" + }, + "x_IRC" : "irc://irc.perl.org/#lwp", + "x_MailingList" : "mailto:libwww@perl.org" + }, + "version" : "1.69", + "x_authority" : "cpan:GAAS", + "x_contributors" : [ + "Gisle Aas ", + "Karen Etheridge ", + "Ville Skyttä ", + "Mark Stosberg ", + "Michael G. Schwern ", + "Olaf Alders ", + "Slaven Rezic ", + "Matt Lawrence ", + "Peter Rabbitson ", + "Piotr Roszatycki ", + "Salvatore Bonaccorso ", + "Tatsuhiko Miyagawa ", + "Torsten Förtsch ", + "Adam Herzog ", + "gerard ", + "Alex Kapranoff ", + "Brendan Byrd ", + "David Schmidt ", + "Jan Dubois ", + "John Miller ", + "Kenichi Ishigaki ", + "Kent Fredric ", + "Masahiro Honma " + ], + "x_serialization_backend" : "JSON::PP version 2.27300" +} + diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..00d9229 --- /dev/null +++ b/META.yml @@ -0,0 +1,60 @@ +--- +abstract: 'Uniform Resource Identifiers (absolute and relative)' +author: + - 'Gisle Aas ' +build_requires: + File::Temp: '0' + Test::More: '0.96' +configure_requires: + ExtUtils::MakeMaker: '0' +dynamic_config: 0 +generated_by: 'ExtUtils::MakeMaker version 7.0525, CPAN::Meta::Converter version 2.150005' +license: perl +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: '1.4' +name: URI +no_index: + directory: + - t + - inc +requires: + Exporter: '5.57' + MIME::Base64: '2' + Scalar::Util: '0' + parent: '0' + perl: '5.008001' + utf8: '0' +resources: + IRC: irc://irc.perl.org/#lwp + MailingList: mailto:libwww@perl.org + bugtracker: https://rt.cpan.org/Public/Dist/Display.html?Name=URI + repository: https://github.com/libwww-perl/uri.git +version: '1.69' +x_authority: cpan:GAAS +x_contributors: + - 'Gisle Aas ' + - 'Karen Etheridge ' + - 'Ville Skyttä ' + - 'Mark Stosberg ' + - 'Michael G. Schwern ' + - 'Olaf Alders ' + - 'Slaven Rezic ' + - 'Matt Lawrence ' + - 'Peter Rabbitson ' + - 'Piotr Roszatycki ' + - 'Salvatore Bonaccorso ' + - 'Tatsuhiko Miyagawa ' + - 'Torsten Förtsch ' + - 'Adam Herzog ' + - 'gerard ' + - 'Alex Kapranoff ' + - 'Brendan Byrd ' + - 'David Schmidt ' + - 'Jan Dubois ' + - 'John Miller ' + - 'Kenichi Ishigaki ' + - 'Kent Fredric ' + - 'Masahiro Honma ' +x_serialization_backend: 'CPAN::Meta::YAML version 0.016' + diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..a8729d4 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,138 @@ +use strict; +use warnings; +require 5.008001; +use utf8; +use ExtUtils::MakeMaker; + +my $developer = -f '.gitignore'; +ExtUtils::MakeMaker->VERSION(6.98) if $developer; + +my %WriteMakefileArgs = ( + NAME => 'URI', + VERSION_FROM => 'lib/URI.pm', + ABSTRACT_FROM => 'lib/URI.pm', + AUTHOR => 'Gisle Aas ', + LICENSE => 'perl_5', + + META_MERGE => { + 'meta-spec' => { version => 2 }, + dynamic_config => 0, + resources => { + repository => { + url => 'https://github.com/libwww-perl/uri.git', + web => 'https://github.com/libwww-perl/uri', + type => 'git', + }, + bugtracker => { + mailto => 'bug-URI@rt.cpan.org', + web => 'https://rt.cpan.org/Public/Dist/Display.html?Name=URI', + }, + x_MailingList => 'mailto:libwww@perl.org', + x_IRC => 'irc://irc.perl.org/#lwp', + }, + x_authority => 'cpan:GAAS', + x_contributors => [ # manually added, from git shortlog -e -s -n + 'Gisle Aas ', + 'Karen Etheridge ', + 'Ville Skyttä ', + 'Mark Stosberg ', + 'Michael G. Schwern ', + 'Olaf Alders ', + 'Slaven Rezic ', + 'Matt Lawrence ', + 'Peter Rabbitson ', + 'Piotr Roszatycki ', + 'Salvatore Bonaccorso ', + 'Tatsuhiko Miyagawa ', + 'Torsten Förtsch ', + 'Adam Herzog ', + 'gerard ', + 'Alex Kapranoff ', + 'Brendan Byrd ', + 'David Schmidt ', + 'Jan Dubois ', + 'John Miller ', + 'Kenichi Ishigaki ', + 'Kent Fredric ', + 'Masahiro Honma ', + ], + }, + + META_ADD => { + prereqs => { + configure => { + requires => { + 'ExtUtils::MakeMaker' => '0', + }, + }, + runtime => { + requires => { + 'MIME::Base64' => '2', + 'parent' => '0', + 'Exporter' => '5.57', + 'utf8' => '0', + 'Scalar::Util' => '0', + 'perl' => '5.008001', + }, + recommends => { + 'Business::ISBN' => '0', + }, + }, + test => { + requires => { + 'Test::More' => '0.96', + 'File::Temp' => '0', + }, + }, + }, + }, +); + +my $eumm_version = eval $ExtUtils::MakeMaker::VERSION; + +for (qw(configure build test runtime)) { + my $key = $_ eq 'runtime' ? 'PREREQ_PM' : uc $_.'_REQUIRES'; + next unless exists $WriteMakefileArgs{META_ADD}{prereqs}{$_} + or exists $WriteMakefileArgs{$key}; + my $r = $WriteMakefileArgs{$key} = { + %{$WriteMakefileArgs{META_ADD}{prereqs}{$_}{requires} || {}}, + %{delete $WriteMakefileArgs{$key} || {}}, + }; + defined $r->{$_} or delete $r->{$_} for keys %$r; +} + +# dynamic prereqs get added here. + +$WriteMakefileArgs{MIN_PERL_VERSION} = delete $WriteMakefileArgs{PREREQ_PM}{perl} || 0; + +die 'attention developer: you need to do a sane meta merge here!' + if keys %{$WriteMakefileArgs{BUILD_REQUIRES}}; + +$WriteMakefileArgs{BUILD_REQUIRES} = { + %{$WriteMakefileArgs{BUILD_REQUIRES} || {}}, + %{delete $WriteMakefileArgs{TEST_REQUIRES}} +} if $eumm_version < 6.63_03; + +$WriteMakefileArgs{PREREQ_PM} = { + %{$WriteMakefileArgs{PREREQ_PM}}, + %{delete $WriteMakefileArgs{BUILD_REQUIRES}} +} if $eumm_version < 6.55_01; + +delete $WriteMakefileArgs{CONFIGURE_REQUIRES} + if $eumm_version < 6.51_03; + +delete $WriteMakefileArgs{MIN_PERL_VERSION} + if $eumm_version < 6.48; + +delete @WriteMakefileArgs{qw(META_ADD META_MERGE)} + if $eumm_version < 6.46; + +delete $WriteMakefileArgs{LICENSE} + if $eumm_version < 6.31; + +WriteMakefile(%WriteMakefileArgs); + +# pod2text is in https://metacpan.org/release/podlators +system("pod2text $WriteMakefileArgs{VERSION_FROM} > README") + if $developer + and (not -e 'README' or (stat('README'))[9] < (stat($WriteMakefileArgs{VERSION_FROM}))[9]); diff --git a/README b/README new file mode 100644 index 0000000..09a8023 --- /dev/null +++ b/README @@ -0,0 +1,667 @@ +NAME + URI - Uniform Resource Identifiers (absolute and relative) + +SYNOPSIS + $u1 = URI->new("http://www.perl.com"); + $u2 = URI->new("foo", "http"); + $u3 = $u2->abs($u1); + $u4 = $u3->clone; + $u5 = URI->new("HTTP://WWW.perl.com:80")->canonical; + + $str = $u->as_string; + $str = "$u"; + + $scheme = $u->scheme; + $opaque = $u->opaque; + $path = $u->path; + $frag = $u->fragment; + + $u->scheme("ftp"); + $u->host("ftp.perl.com"); + $u->path("cpan/"); + +DESCRIPTION + This module implements the "URI" class. Objects of this class represent + "Uniform Resource Identifier references" as specified in RFC 2396 (and + updated by RFC 2732). + + A Uniform Resource Identifier is a compact string of characters that + identifies an abstract or physical resource. A Uniform Resource + Identifier can be further classified as either a Uniform Resource + Locator (URL) or a Uniform Resource Name (URN). The distinction between + URL and URN does not matter to the "URI" class interface. A + "URI-reference" is a URI that may have additional information attached + in the form of a fragment identifier. + + An absolute URI reference consists of three parts: a *scheme*, a + *scheme-specific part* and a *fragment* identifier. A subset of URI + references share a common syntax for hierarchical namespaces. For these, + the scheme-specific part is further broken down into *authority*, *path* + and *query* components. These URIs can also take the form of relative + URI references, where the scheme (and usually also the authority) + component is missing, but implied by the context of the URI reference. + The three forms of URI reference syntax are summarized as follows: + + :# + ://?# + ?# + + The components into which a URI reference can be divided depend on the + *scheme*. The "URI" class provides methods to get and set the individual + components. The methods available for a specific "URI" object depend on + the scheme. + +CONSTRUCTORS + The following methods construct new "URI" objects: + + $uri = URI->new( $str ) + $uri = URI->new( $str, $scheme ) + Constructs a new URI object. The string representation of a URI is + given as argument, together with an optional scheme specification. + Common URI wrappers like "" and <>, as well as leading and trailing + white space, are automatically removed from the $str argument before + it is processed further. + + The constructor determines the scheme, maps this to an appropriate + URI subclass, constructs a new object of that class and returns it. + + If the scheme isn't one of those that URI recognizes, you still get + an URI object back that you can access the generic methods on. The + "$uri->has_recognized_scheme" method can be used to test for this. + + The $scheme argument is only used when $str is a relative URI. It + can be either a simple string that denotes the scheme, a string + containing an absolute URI reference, or an absolute "URI" object. + If no $scheme is specified for a relative URI $str, then $str is + simply treated as a generic URI (no scheme-specific methods + available). + + The set of characters available for building URI references is + restricted (see URI::Escape). Characters outside this set are + automatically escaped by the URI constructor. + + $uri = URI->new_abs( $str, $base_uri ) + Constructs a new absolute URI object. The $str argument can denote a + relative or absolute URI. If relative, then it is absolutized using + $base_uri as base. The $base_uri must be an absolute URI. + + $uri = URI::file->new( $filename ) + $uri = URI::file->new( $filename, $os ) + Constructs a new *file* URI from a file name. See URI::file. + + $uri = URI::file->new_abs( $filename ) + $uri = URI::file->new_abs( $filename, $os ) + Constructs a new absolute *file* URI from a file name. See + URI::file. + + $uri = URI::file->cwd + Returns the current working directory as a *file* URI. See + URI::file. + + $uri->clone + Returns a copy of the $uri. + +COMMON METHODS + The methods described in this section are available for all "URI" + objects. + + Methods that give access to components of a URI always return the old + value of the component. The value returned is "undef" if the component + was not present. There is generally a difference between a component + that is empty (represented as "") and a component that is missing + (represented as "undef"). If an accessor method is given an argument, it + updates the corresponding component in addition to returning the old + value of the component. Passing an undefined argument removes the + component (if possible). The description of each accessor method + indicates whether the component is passed as an escaped + (percent-encoded) or an unescaped string. A component that can be + further divided into sub-parts are usually passed escaped, as unescaping + might change its semantics. + + The common methods available for all URI are: + + $uri->scheme + $uri->scheme( $new_scheme ) + Sets and returns the scheme part of the $uri. If the $uri is + relative, then $uri->scheme returns "undef". If called with an + argument, it updates the scheme of $uri, possibly changing the class + of $uri, and returns the old scheme value. The method croaks if the + new scheme name is illegal; a scheme name must begin with a letter + and must consist of only US-ASCII letters, numbers, and a few + special marks: ".", "+", "-". This restriction effectively means + that the scheme must be passed unescaped. Passing an undefined + argument to the scheme method makes the URI relative (if possible). + + Letter case does not matter for scheme names. The string returned by + $uri->scheme is always lowercase. If you want the scheme just as it + was written in the URI in its original case, you can use the + $uri->_scheme method instead. + + $uri->has_recognized_scheme + Returns TRUE if the URI scheme is one that URI recognizes. + + It will also be TRUE for relative URLs where a recognized scheme was + provided to the constructor, even if "$uri->scheme" returns "undef" + for these. + + $uri->opaque + $uri->opaque( $new_opaque ) + Sets and returns the scheme-specific part of the $uri (everything + between the scheme and the fragment) as an escaped string. + + $uri->path + $uri->path( $new_path ) + Sets and returns the same value as $uri->opaque unless the URI + supports the generic syntax for hierarchical namespaces. In that + case the generic method is overridden to set and return the part of + the URI between the *host name* and the *fragment*. + + $uri->fragment + $uri->fragment( $new_frag ) + Returns the fragment identifier of a URI reference as an escaped + string. + + $uri->as_string + Returns a URI object to a plain ASCII string. URI objects are also + converted to plain strings automatically by overloading. This means + that $uri objects can be used as plain strings in most Perl + constructs. + + $uri->as_iri + Returns a Unicode string representing the URI. Escaped UTF-8 + sequences representing non-ASCII characters are turned into their + corresponding Unicode code point. + + $uri->canonical + Returns a normalized version of the URI. The rules for normalization + are scheme-dependent. They usually involve lowercasing the scheme + and Internet host name components, removing the explicit port + specification if it matches the default port, uppercasing all escape + sequences, and unescaping octets that can be better represented as + plain characters. + + For efficiency reasons, if the $uri is already in normalized form, + then a reference to it is returned instead of a copy. + + $uri->eq( $other_uri ) + URI::eq( $first_uri, $other_uri ) + Tests whether two URI references are equal. URI references that + normalize to the same string are considered equal. The method can + also be used as a plain function which can also test two string + arguments. + + If you need to test whether two "URI" object references denote the + same object, use the '==' operator. + + $uri->abs( $base_uri ) + Returns an absolute URI reference. If $uri is already absolute, then + a reference to it is simply returned. If the $uri is relative, then + a new absolute URI is constructed by combining the $uri and the + $base_uri, and returned. + + $uri->rel( $base_uri ) + Returns a relative URI reference if it is possible to make one that + denotes the same resource relative to $base_uri. If not, then $uri + is simply returned. + + $uri->secure + Returns a TRUE value if the URI is considered to point to a resource + on a secure channel, such as an SSL or TLS encrypted one. + +GENERIC METHODS + The following methods are available to schemes that use the + common/generic syntax for hierarchical namespaces. The descriptions of + schemes below indicate which these are. Unrecognized schemes are assumed + to support the generic syntax, and therefore the following methods: + + $uri->authority + $uri->authority( $new_authority ) + Sets and returns the escaped authority component of the $uri. + + $uri->path + $uri->path( $new_path ) + Sets and returns the escaped path component of the $uri (the part + between the host name and the query or fragment). The path can never + be undefined, but it can be the empty string. + + $uri->path_query + $uri->path_query( $new_path_query ) + Sets and returns the escaped path and query components as a single + entity. The path and the query are separated by a "?" character, but + the query can itself contain "?". + + $uri->path_segments + $uri->path_segments( $segment, ... ) + Sets and returns the path. In a scalar context, it returns the same + value as $uri->path. In a list context, it returns the unescaped + path segments that make up the path. Path segments that have + parameters are returned as an anonymous array. The first element is + the unescaped path segment proper; subsequent elements are escaped + parameter strings. Such an anonymous array uses overloading so it + can be treated as a string too, but this string does not include the + parameters. + + Note that absolute paths have the empty string as their first + *path_segment*, i.e. the *path* "/foo/bar" have 3 *path_segments*; + "", "foo" and "bar". + + $uri->query + $uri->query( $new_query ) + Sets and returns the escaped query component of the $uri. + + $uri->query_form + $uri->query_form( $key1 => $val1, $key2 => $val2, ... ) + $uri->query_form( $key1 => $val1, $key2 => $val2, ..., $delim ) + $uri->query_form( \@key_value_pairs ) + $uri->query_form( \@key_value_pairs, $delim ) + $uri->query_form( \%hash ) + $uri->query_form( \%hash, $delim ) + Sets and returns query components that use the + *application/x-www-form-urlencoded* format. Key/value pairs are + separated by "&", and the key is separated from the value by a "=" + character. + + The form can be set either by passing separate key/value pairs, or + via an array or hash reference. Passing an empty array or an empty + hash removes the query component, whereas passing no arguments at + all leaves the component unchanged. The order of keys is undefined + if a hash reference is passed. The old value is always returned as a + list of separate key/value pairs. Assigning this list to a hash is + unwise as the keys returned might repeat. + + The values passed when setting the form can be plain strings or + references to arrays of strings. Passing an array of values has the + same effect as passing the key repeatedly with one value at a time. + All the following statements have the same effect: + + $uri->query_form(foo => 1, foo => 2); + $uri->query_form(foo => [1, 2]); + $uri->query_form([ foo => 1, foo => 2 ]); + $uri->query_form([ foo => [1, 2] ]); + $uri->query_form({ foo => [1, 2] }); + + The $delim parameter can be passed as ";" to force the key/value + pairs to be delimited by ";" instead of "&" in the query string. + This practice is often recommended for URLs embedded in HTML or XML + documents as this avoids the trouble of escaping the "&" character. + You might also set the $URI::DEFAULT_QUERY_FORM_DELIMITER variable + to ";" for the same global effect. + + The "URI::QueryParam" module can be loaded to add further methods to + manipulate the form of a URI. See URI::QueryParam for details. + + $uri->query_keywords + $uri->query_keywords( $keywords, ... ) + $uri->query_keywords( \@keywords ) + Sets and returns query components that use the keywords separated by + "+" format. + + The keywords can be set either by passing separate keywords directly + or by passing a reference to an array of keywords. Passing an empty + array removes the query component, whereas passing no arguments at + all leaves the component unchanged. The old value is always returned + as a list of separate words. + +SERVER METHODS + For schemes where the *authority* component denotes an Internet host, + the following methods are available in addition to the generic methods. + + $uri->userinfo + $uri->userinfo( $new_userinfo ) + Sets and returns the escaped userinfo part of the authority + component. + + For some schemes this is a user name and a password separated by a + colon. This practice is not recommended. Embedding passwords in + clear text (such as URI) has proven to be a security risk in almost + every case where it has been used. + + $uri->host + $uri->host( $new_host ) + Sets and returns the unescaped hostname. + + If the $new_host string ends with a colon and a number, then this + number also sets the port. + + For IPv6 addresses the brackets around the raw address is removed in + the return value from $uri->host. When setting the host attribute to + an IPv6 address you can use a raw address or one enclosed in + brackets. The address needs to be enclosed in brackets if you want + to pass in a new port value as well. + + $uri->ihost + Returns the host in Unicode form. Any IDNA A-labels are turned into + U-labels. + + $uri->port + $uri->port( $new_port ) + Sets and returns the port. The port is a simple integer that should + be greater than 0. + + If a port is not specified explicitly in the URI, then the URI + scheme's default port is returned. If you don't want the default + port substituted, then you can use the $uri->_port method instead. + + $uri->host_port + $uri->host_port( $new_host_port ) + Sets and returns the host and port as a single unit. The returned + value includes a port, even if it matches the default port. The host + part and the port part are separated by a colon: ":". + + For IPv6 addresses the bracketing is preserved; thus + URI->new("http://[::1]/")->host_port returns "[::1]:80". Contrast + this with $uri->host which will remove the brackets. + + $uri->default_port + Returns the default port of the URI scheme to which $uri belongs. + For *http* this is the number 80, for *ftp* this is the number 21, + etc. The default port for a scheme can not be changed. + +SCHEME-SPECIFIC SUPPORT + Scheme-specific support is provided for the following URI schemes. For + "URI" objects that do not belong to one of these, you can only use the + common and generic methods. + + data: + The *data* URI scheme is specified in RFC 2397. It allows inclusion + of small data items as "immediate" data, as if it had been included + externally. + + "URI" objects belonging to the data scheme support the common + methods and two new methods to access their scheme-specific + components: $uri->media_type and $uri->data. See URI::data for + details. + + file: + An old specification of the *file* URI scheme is found in RFC 1738. + A new RFC 2396 based specification in not available yet, but file + URI references are in common use. + + "URI" objects belonging to the file scheme support the common and + generic methods. In addition, they provide two methods for mapping + file URIs back to local file names; $uri->file and $uri->dir. See + URI::file for details. + + ftp: + An old specification of the *ftp* URI scheme is found in RFC 1738. A + new RFC 2396 based specification in not available yet, but ftp URI + references are in common use. + + "URI" objects belonging to the ftp scheme support the common, + generic and server methods. In addition, they provide two methods + for accessing the userinfo sub-components: $uri->user and + $uri->password. + + gopher: + The *gopher* URI scheme is specified in + and will hopefully be available + as a RFC 2396 based specification. + + "URI" objects belonging to the gopher scheme support the common, + generic and server methods. In addition, they support some methods + for accessing gopher-specific path components: $uri->gopher_type, + $uri->selector, $uri->search, $uri->string. + + http: + The *http* URI scheme is specified in RFC 2616. The scheme is used + to reference resources hosted by HTTP servers. + + "URI" objects belonging to the http scheme support the common, + generic and server methods. + + https: + The *https* URI scheme is a Netscape invention which is commonly + implemented. The scheme is used to reference HTTP servers through + SSL connections. Its syntax is the same as http, but the default + port is different. + + ldap: + The *ldap* URI scheme is specified in RFC 2255. LDAP is the + Lightweight Directory Access Protocol. An ldap URI describes an LDAP + search operation to perform to retrieve information from an LDAP + directory. + + "URI" objects belonging to the ldap scheme support the common, + generic and server methods as well as ldap-specific methods: + $uri->dn, $uri->attributes, $uri->scope, $uri->filter, + $uri->extensions. See URI::ldap for details. + + ldapi: + Like the *ldap* URI scheme, but uses a UNIX domain socket. The + server methods are not supported, and the local socket path is + available as $uri->un_path. The *ldapi* scheme is used by the + OpenLDAP package. There is no real specification for it, but it is + mentioned in various OpenLDAP manual pages. + + ldaps: + Like the *ldap* URI scheme, but uses an SSL connection. This scheme + is deprecated, as the preferred way is to use the *start_tls* + mechanism. + + mailto: + The *mailto* URI scheme is specified in RFC 2368. The scheme was + originally used to designate the Internet mailing address of an + individual or service. It has (in RFC 2368) been extended to allow + setting of other mail header fields and the message body. + + "URI" objects belonging to the mailto scheme support the common + methods and the generic query methods. In addition, they support the + following mailto-specific methods: $uri->to, $uri->headers. + + Note that the "foo@example.com" part of a mailto is *not* the + "userinfo" and "host" but instead the "path". This allows a mailto + URI to contain multiple comma separated email addresses. + + mms: + The *mms* URL specification can be found at . + "URI" objects belonging to the mms scheme support the common, + generic, and server methods, with the exception of userinfo and + query-related sub-components. + + news: + The *news*, *nntp* and *snews* URI schemes are specified in + and will hopefully be available as an RFC + 2396 based specification soon. + + "URI" objects belonging to the news scheme support the common, + generic and server methods. In addition, they provide some methods + to access the path: $uri->group and $uri->message. + + nntp: + See *news* scheme. + + pop: + The *pop* URI scheme is specified in RFC 2384. The scheme is used to + reference a POP3 mailbox. + + "URI" objects belonging to the pop scheme support the common, + generic and server methods. In addition, they provide two methods to + access the userinfo components: $uri->user and $uri->auth + + rlogin: + An old specification of the *rlogin* URI scheme is found in RFC + 1738. "URI" objects belonging to the rlogin scheme support the + common, generic and server methods. + + rtsp: + The *rtsp* URL specification can be found in section 3.2 of RFC + 2326. "URI" objects belonging to the rtsp scheme support the common, + generic, and server methods, with the exception of userinfo and + query-related sub-components. + + rtspu: + The *rtspu* URI scheme is used to talk to RTSP servers over UDP + instead of TCP. The syntax is the same as rtsp. + + rsync: + Information about rsync is available from . + "URI" objects belonging to the rsync scheme support the common, + generic and server methods. In addition, they provide methods to + access the userinfo sub-components: $uri->user and $uri->password. + + sip: + The *sip* URI specification is described in sections 19.1 and 25 of + RFC 3261. "URI" objects belonging to the sip scheme support the + common, generic, and server methods with the exception of path + related sub-components. In addition, they provide two methods to get + and set *sip* parameters: $uri->params_form and $uri->params. + + sips: + See *sip* scheme. Its syntax is the same as sip, but the default + port is different. + + snews: + See *news* scheme. Its syntax is the same as news, but the default + port is different. + + telnet: + An old specification of the *telnet* URI scheme is found in RFC + 1738. "URI" objects belonging to the telnet scheme support the + common, generic and server methods. + + tn3270: + These URIs are used like *telnet* URIs but for connections to IBM + mainframes. "URI" objects belonging to the tn3270 scheme support the + common, generic and server methods. + + ssh: + Information about ssh is available at . + "URI" objects belonging to the ssh scheme support the common, + generic and server methods. In addition, they provide methods to + access the userinfo sub-components: $uri->user and $uri->password. + + sftp: + "URI" objects belonging to the sftp scheme support the common, + generic and server methods. In addition, they provide methods to + access the userinfo sub-components: $uri->user and $uri->password. + + urn: + The syntax of Uniform Resource Names is specified in RFC 2141. "URI" + objects belonging to the urn scheme provide the common methods, and + also the methods $uri->nid and $uri->nss, which return the Namespace + Identifier and the Namespace-Specific String respectively. + + The Namespace Identifier basically works like the Scheme identifier + of URIs, and further divides the URN namespace. Namespace Identifier + assignments are maintained at + . + + Letter case is not significant for the Namespace Identifier. It is + always returned in lower case by the $uri->nid method. The + $uri->_nid method can be used if you want it in its original case. + + urn:isbn: + The "urn:isbn:" namespace contains International Standard Book + Numbers (ISBNs) and is described in RFC 3187. A "URI" object + belonging to this namespace has the following extra methods (if the + Business::ISBN module is available): $uri->isbn, + $uri->isbn_publisher_code, $uri->isbn_group_code (formerly + isbn_country_code, which is still supported by issues a deprecation + warning), $uri->isbn_as_ean. + + urn:oid: + The "urn:oid:" namespace contains Object Identifiers (OIDs) and is + described in RFC 3061. An object identifier consists of sequences of + digits separated by dots. A "URI" object belonging to this namespace + has an additional method called $uri->oid that can be used to + get/set the oid value. In a list context, oid numbers are returned + as separate elements. + +CONFIGURATION VARIABLES + The following configuration variables influence how the class and its + methods behave: + + $URI::ABS_ALLOW_RELATIVE_SCHEME + Some older parsers used to allow the scheme name to be present in + the relative URL if it was the same as the base URL scheme. RFC 2396 + says that this should be avoided, but you can enable this old + behaviour by setting the $URI::ABS_ALLOW_RELATIVE_SCHEME variable to + a TRUE value. The difference is demonstrated by the following + examples: + + URI->new("http:foo")->abs("http://host/a/b") + ==> "http:foo" + + local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1; + URI->new("http:foo")->abs("http://host/a/b") + ==> "http:/host/a/foo" + + $URI::ABS_REMOTE_LEADING_DOTS + You can also have the abs() method ignore excess ".." segments in + the relative URI by setting $URI::ABS_REMOTE_LEADING_DOTS to a TRUE + value. The difference is demonstrated by the following examples: + + URI->new("../../../foo")->abs("http://host/a/b") + ==> "http://host/../../foo" + + local $URI::ABS_REMOTE_LEADING_DOTS = 1; + URI->new("../../../foo")->abs("http://host/a/b") + ==> "http://host/foo" + + $URI::DEFAULT_QUERY_FORM_DELIMITER + This value can be set to ";" to have the query form "key=value" + pairs delimited by ";" instead of "&" which is the default. + +BUGS + There are some things that are not quite right: + + * Using regexp variables like $1 directly as arguments to the URI + accessor methods does not work too well with current perl + implementations. I would argue that this is actually a bug in perl. + The workaround is to quote them. Example: + + /(...)/ || die; + $u->query("$1"); + + * The escaping (percent encoding) of chars in the 128 .. 255 range + passed to the URI constructor or when setting URI parts using the + accessor methods depend on the state of the internal UTF8 flag (see + utf8::is_utf8) of the string passed. If the UTF8 flag is set the + UTF-8 encoded version of the character is percent encoded. If the + UTF8 flag isn't set the Latin-1 version (byte) of the character is + percent encoded. This basically exposes the internal encoding of + Perl strings. + +PARSING URIs WITH REGEXP + As an alternative to this module, the following (official) regular + expression can be used to decode a URI: + + my($scheme, $authority, $path, $query, $fragment) = + $uri =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; + + The "URI::Split" module provides the function uri_split() as a readable + alternative. + +SEE ALSO + URI::file, URI::WithBase, URI::QueryParam, URI::Escape, URI::Split, + URI::Heuristic + + RFC 2396: "Uniform Resource Identifiers (URI): Generic Syntax", + Berners-Lee, Fielding, Masinter, August 1998. + + + + + + + +COPYRIGHT + Copyright 1995-2009 Gisle Aas. + + Copyright 1995 Martijn Koster. + + This program is free software; you can redistribute it and/or modify it + under the same terms as Perl itself. + +AUTHORS / ACKNOWLEDGMENTS + This module is based on the "URI::URL" module, which in turn was + (distantly) based on the "wwwurl.pl" code in the libwww-perl for perl4 + developed by Roy Fielding, as part of the Arcadia project at the + University of California, Irvine, with contributions from Brooks Cutter. + + "URI::URL" was developed by Gisle Aas, Tim Bunce, Roy Fielding and + Martijn Koster with input from other people on the libwww-perl mailing + list. + + "URI" and related subclasses was developed by Gisle Aas. + diff --git a/lib/URI.pm b/lib/URI.pm new file mode 100644 index 0000000..98cd575 --- /dev/null +++ b/lib/URI.pm @@ -0,0 +1,1155 @@ +package URI; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +our ($ABS_REMOTE_LEADING_DOTS, $ABS_ALLOW_RELATIVE_SCHEME, $DEFAULT_QUERY_FORM_DELIMITER); + +my %implements; # mapping from scheme to implementor class + +# Some "official" character classes + +our $reserved = q(;/?:@&=+$,[]); +our $mark = q(-_.!~*'()); #'; emacs +our $unreserved = "A-Za-z0-9\Q$mark\E"; +our $uric = quotemeta($reserved) . $unreserved . "%"; + +our $scheme_re = '[a-zA-Z][a-zA-Z0-9.+\-]*'; + +use Carp (); +use URI::Escape (); + +use overload ('""' => sub { ${$_[0]} }, + '==' => sub { _obj_eq(@_) }, + '!=' => sub { !_obj_eq(@_) }, + fallback => 1, + ); + +# Check if two objects are the same object +sub _obj_eq { + return overload::StrVal($_[0]) eq overload::StrVal($_[1]); +} + +sub new +{ + my($class, $uri, $scheme) = @_; + + $uri = defined ($uri) ? "$uri" : ""; # stringify + # Get rid of potential wrapping + $uri =~ s/^<(?:URL:)?(.*)>$/$1/; # + $uri =~ s/^"(.*)"$/$1/; + $uri =~ s/^\s+//; + $uri =~ s/\s+$//; + + my $impclass; + if ($uri =~ m/^($scheme_re):/so) { + $scheme = $1; + } + else { + if (($impclass = ref($scheme))) { + $scheme = $scheme->scheme; + } + elsif ($scheme && $scheme =~ m/^($scheme_re)(?::|$)/o) { + $scheme = $1; + } + } + $impclass ||= implementor($scheme) || + do { + require URI::_foreign; + $impclass = 'URI::_foreign'; + }; + + return $impclass->_init($uri, $scheme); +} + + +sub new_abs +{ + my($class, $uri, $base) = @_; + $uri = $class->new($uri, $base); + $uri->abs($base); +} + + +sub _init +{ + my $class = shift; + my($str, $scheme) = @_; + # find all funny characters and encode the bytes. + $str = $class->_uric_escape($str); + $str = "$scheme:$str" unless $str =~ /^$scheme_re:/o || + $class->_no_scheme_ok; + my $self = bless \$str, $class; + $self; +} + + +sub _uric_escape +{ + my($class, $str) = @_; + $str =~ s*([^$uric\#])* URI::Escape::escape_char($1) *ego; + utf8::downgrade($str); + return $str; +} + + +sub implementor +{ + my($scheme, $impclass) = @_; + if (!$scheme || $scheme !~ /\A$scheme_re\z/o) { + require URI::_generic; + return "URI::_generic"; + } + + $scheme = lc($scheme); + + if ($impclass) { + # Set the implementor class for a given scheme + my $old = $implements{$scheme}; + $impclass->_init_implementor($scheme); + $implements{$scheme} = $impclass; + return $old; + } + + my $ic = $implements{$scheme}; + return $ic if $ic; + + # scheme not yet known, look for internal or + # preloaded (with 'use') implementation + $ic = "URI::$scheme"; # default location + + # turn scheme into a valid perl identifier by a simple transformation... + $ic =~ s/\+/_P/g; + $ic =~ s/\./_O/g; + $ic =~ s/\-/_/g; + + no strict 'refs'; + # check we actually have one for the scheme: + unless (@{"${ic}::ISA"}) { + # Try to load it + eval "require $ic"; + die $@ if $@ && $@ !~ /Can\'t locate.*in \@INC/; + return undef unless @{"${ic}::ISA"}; + } + + $ic->_init_implementor($scheme); + $implements{$scheme} = $ic; + $ic; +} + + +sub _init_implementor +{ + my($class, $scheme) = @_; + # Remember that one implementor class may actually + # serve to implement several URI schemes. +} + + +sub clone +{ + my $self = shift; + my $other = $$self; + bless \$other, ref $self; +} + +sub TO_JSON { ${$_[0]} } + +sub _no_scheme_ok { 0 } + +sub _scheme +{ + my $self = shift; + + unless (@_) { + return undef unless $$self =~ /^($scheme_re):/o; + return $1; + } + + my $old; + my $new = shift; + if (defined($new) && length($new)) { + Carp::croak("Bad scheme '$new'") unless $new =~ /^$scheme_re$/o; + $old = $1 if $$self =~ s/^($scheme_re)://o; + my $newself = URI->new("$new:$$self"); + $$self = $$newself; + bless $self, ref($newself); + } + else { + if ($self->_no_scheme_ok) { + $old = $1 if $$self =~ s/^($scheme_re)://o; + Carp::carp("Oops, opaque part now look like scheme") + if $^W && $$self =~ m/^$scheme_re:/o + } + else { + $old = $1 if $$self =~ m/^($scheme_re):/o; + } + } + + return $old; +} + +sub scheme +{ + my $scheme = shift->_scheme(@_); + return undef unless defined $scheme; + lc($scheme); +} + +sub has_recognized_scheme { + my $self = shift; + return ref($self) !~ /^URI::_(?:foreign|generic)\z/; +} + +sub opaque +{ + my $self = shift; + + unless (@_) { + $$self =~ /^(?:$scheme_re:)?([^\#]*)/o or die; + return $1; + } + + $$self =~ /^($scheme_re:)? # optional scheme + ([^\#]*) # opaque + (\#.*)? # optional fragment + $/sx or die; + + my $old_scheme = $1; + my $old_opaque = $2; + my $old_frag = $3; + + my $new_opaque = shift; + $new_opaque = "" unless defined $new_opaque; + $new_opaque =~ s/([^$uric])/ URI::Escape::escape_char($1)/ego; + utf8::downgrade($new_opaque); + + $$self = defined($old_scheme) ? $old_scheme : ""; + $$self .= $new_opaque; + $$self .= $old_frag if defined $old_frag; + + $old_opaque; +} + +sub path { goto &opaque } # alias + + +sub fragment +{ + my $self = shift; + unless (@_) { + return undef unless $$self =~ /\#(.*)/s; + return $1; + } + + my $old; + $old = $1 if $$self =~ s/\#(.*)//s; + + my $new_frag = shift; + if (defined $new_frag) { + $new_frag =~ s/([^$uric])/ URI::Escape::escape_char($1) /ego; + utf8::downgrade($new_frag); + $$self .= "#$new_frag"; + } + $old; +} + + +sub as_string +{ + my $self = shift; + $$self; +} + + +sub as_iri +{ + my $self = shift; + my $str = $$self; + if ($str =~ s/%([89a-fA-F][0-9a-fA-F])/chr(hex($1))/eg) { + # All this crap because the more obvious: + # + # Encode::decode("UTF-8", $str, sub { sprintf "%%%02X", shift }) + # + # doesn't work before Encode 2.39. Wait for a standard release + # to bundle that version. + + require Encode; + my $enc = Encode::find_encoding("UTF-8"); + my $u = ""; + while (length $str) { + $u .= $enc->decode($str, Encode::FB_QUIET()); + if (length $str) { + # escape next char + $u .= URI::Escape::escape_char(substr($str, 0, 1, "")); + } + } + $str = $u; + } + return $str; +} + + +sub canonical +{ + # Make sure scheme is lowercased, that we don't escape unreserved chars, + # and that we use upcase escape sequences. + + my $self = shift; + my $scheme = $self->_scheme || ""; + my $uc_scheme = $scheme =~ /[A-Z]/; + my $esc = $$self =~ /%[a-fA-F0-9]{2}/; + return $self unless $uc_scheme || $esc; + + my $other = $self->clone; + if ($uc_scheme) { + $other->_scheme(lc $scheme); + } + if ($esc) { + $$other =~ s{%([0-9a-fA-F]{2})} + { my $a = chr(hex($1)); + $a =~ /^[$unreserved]\z/o ? $a : "%\U$1" + }ge; + } + return $other; +} + +# Compare two URIs, subclasses will provide a more correct implementation +sub eq { + my($self, $other) = @_; + $self = URI->new($self, $other) unless ref $self; + $other = URI->new($other, $self) unless ref $other; + ref($self) eq ref($other) && # same class + $self->canonical->as_string eq $other->canonical->as_string; +} + +# generic-URI transformation methods +sub abs { $_[0]; } +sub rel { $_[0]; } + +sub secure { 0 } + +# help out Storable +sub STORABLE_freeze { + my($self, $cloning) = @_; + return $$self; +} + +sub STORABLE_thaw { + my($self, $cloning, $str) = @_; + $$self = $str; +} + +1; + +__END__ + +=head1 NAME + +URI - Uniform Resource Identifiers (absolute and relative) + +=head1 SYNOPSIS + + $u1 = URI->new("http://www.perl.com"); + $u2 = URI->new("foo", "http"); + $u3 = $u2->abs($u1); + $u4 = $u3->clone; + $u5 = URI->new("HTTP://WWW.perl.com:80")->canonical; + + $str = $u->as_string; + $str = "$u"; + + $scheme = $u->scheme; + $opaque = $u->opaque; + $path = $u->path; + $frag = $u->fragment; + + $u->scheme("ftp"); + $u->host("ftp.perl.com"); + $u->path("cpan/"); + +=head1 DESCRIPTION + +This module implements the C class. Objects of this class +represent "Uniform Resource Identifier references" as specified in RFC +2396 (and updated by RFC 2732). + +A Uniform Resource Identifier is a compact string of characters that +identifies an abstract or physical resource. A Uniform Resource +Identifier can be further classified as either a Uniform Resource Locator +(URL) or a Uniform Resource Name (URN). The distinction between URL +and URN does not matter to the C class interface. A +"URI-reference" is a URI that may have additional information attached +in the form of a fragment identifier. + +An absolute URI reference consists of three parts: a I, a +I and a I identifier. A subset of URI +references share a common syntax for hierarchical namespaces. For +these, the scheme-specific part is further broken down into +I, I and I components. These URIs can also +take the form of relative URI references, where the scheme (and +usually also the authority) component is missing, but implied by the +context of the URI reference. The three forms of URI reference +syntax are summarized as follows: + + :# + ://?# + ?# + +The components into which a URI reference can be divided depend on the +I. The C class provides methods to get and set the +individual components. The methods available for a specific +C object depend on the scheme. + +=head1 CONSTRUCTORS + +The following methods construct new C objects: + +=over 4 + +=item $uri = URI->new( $str ) + +=item $uri = URI->new( $str, $scheme ) + +Constructs a new URI object. The string +representation of a URI is given as argument, together with an optional +scheme specification. Common URI wrappers like "" and <>, as well as +leading and trailing white space, are automatically removed from +the $str argument before it is processed further. + +The constructor determines the scheme, maps this to an appropriate +URI subclass, constructs a new object of that class and returns it. + +If the scheme isn't one of those that URI recognizes, you still get +an URI object back that you can access the generic methods on. The +C<< $uri->has_recognized_scheme >> method can be used to test for +this. + +The $scheme argument is only used when $str is a +relative URI. It can be either a simple string that +denotes the scheme, a string containing an absolute URI reference, or +an absolute C object. If no $scheme is specified for a relative +URI $str, then $str is simply treated as a generic URI (no scheme-specific +methods available). + +The set of characters available for building URI references is +restricted (see L). Characters outside this set are +automatically escaped by the URI constructor. + +=item $uri = URI->new_abs( $str, $base_uri ) + +Constructs a new absolute URI object. The $str argument can +denote a relative or absolute URI. If relative, then it is +absolutized using $base_uri as base. The $base_uri must be an absolute +URI. + +=item $uri = URI::file->new( $filename ) + +=item $uri = URI::file->new( $filename, $os ) + +Constructs a new I URI from a file name. See L. + +=item $uri = URI::file->new_abs( $filename ) + +=item $uri = URI::file->new_abs( $filename, $os ) + +Constructs a new absolute I URI from a file name. See +L. + +=item $uri = URI::file->cwd + +Returns the current working directory as a I URI. See +L. + +=item $uri->clone + +Returns a copy of the $uri. + +=back + +=head1 COMMON METHODS + +The methods described in this section are available for all C +objects. + +Methods that give access to components of a URI always return the +old value of the component. The value returned is C if the +component was not present. There is generally a difference between a +component that is empty (represented as C<"">) and a component that is +missing (represented as C). If an accessor method is given an +argument, it updates the corresponding component in addition to +returning the old value of the component. Passing an undefined +argument removes the component (if possible). The description of +each accessor method indicates whether the component is passed as +an escaped (percent-encoded) or an unescaped string. A component that can be further +divided into sub-parts are usually passed escaped, as unescaping might +change its semantics. + +The common methods available for all URI are: + +=over 4 + +=item $uri->scheme + +=item $uri->scheme( $new_scheme ) + +Sets and returns the scheme part of the $uri. If the $uri is +relative, then $uri->scheme returns C. If called with an +argument, it updates the scheme of $uri, possibly changing the +class of $uri, and returns the old scheme value. The method croaks +if the new scheme name is illegal; a scheme name must begin with a +letter and must consist of only US-ASCII letters, numbers, and a few +special marks: ".", "+", "-". This restriction effectively means +that the scheme must be passed unescaped. Passing an undefined +argument to the scheme method makes the URI relative (if possible). + +Letter case does not matter for scheme names. The string +returned by $uri->scheme is always lowercase. If you want the scheme +just as it was written in the URI in its original case, +you can use the $uri->_scheme method instead. + +=item $uri->has_recognized_scheme + +Returns TRUE if the URI scheme is one that URI recognizes. + +It will also be TRUE for relative URLs where a recognized +scheme was provided to the constructor, even if C<< $uri->scheme >> +returns C for these. + +=item $uri->opaque + +=item $uri->opaque( $new_opaque ) + +Sets and returns the scheme-specific part of the $uri +(everything between the scheme and the fragment) +as an escaped string. + +=item $uri->path + +=item $uri->path( $new_path ) + +Sets and returns the same value as $uri->opaque unless the URI +supports the generic syntax for hierarchical namespaces. +In that case the generic method is overridden to set and return +the part of the URI between the I and the I. + +=item $uri->fragment + +=item $uri->fragment( $new_frag ) + +Returns the fragment identifier of a URI reference +as an escaped string. + +=item $uri->as_string + +Returns a URI object to a plain ASCII string. URI objects are +also converted to plain strings automatically by overloading. This +means that $uri objects can be used as plain strings in most Perl +constructs. + +=item $uri->as_iri + +Returns a Unicode string representing the URI. Escaped UTF-8 sequences +representing non-ASCII characters are turned into their corresponding Unicode +code point. + +=item $uri->canonical + +Returns a normalized version of the URI. The rules +for normalization are scheme-dependent. They usually involve +lowercasing the scheme and Internet host name components, +removing the explicit port specification if it matches the default port, +uppercasing all escape sequences, and unescaping octets that can be +better represented as plain characters. + +For efficiency reasons, if the $uri is already in normalized form, +then a reference to it is returned instead of a copy. + +=item $uri->eq( $other_uri ) + +=item URI::eq( $first_uri, $other_uri ) + +Tests whether two URI references are equal. URI references +that normalize to the same string are considered equal. The method +can also be used as a plain function which can also test two string +arguments. + +If you need to test whether two C object references denote the +same object, use the '==' operator. + +=item $uri->abs( $base_uri ) + +Returns an absolute URI reference. If $uri is already +absolute, then a reference to it is simply returned. If the $uri +is relative, then a new absolute URI is constructed by combining the +$uri and the $base_uri, and returned. + +=item $uri->rel( $base_uri ) + +Returns a relative URI reference if it is possible to +make one that denotes the same resource relative to $base_uri. +If not, then $uri is simply returned. + +=item $uri->secure + +Returns a TRUE value if the URI is considered to point to a resource on +a secure channel, such as an SSL or TLS encrypted one. + +=back + +=head1 GENERIC METHODS + +The following methods are available to schemes that use the +common/generic syntax for hierarchical namespaces. The descriptions of +schemes below indicate which these are. Unrecognized schemes are +assumed to support the generic syntax, and therefore the following +methods: + +=over 4 + +=item $uri->authority + +=item $uri->authority( $new_authority ) + +Sets and returns the escaped authority component +of the $uri. + +=item $uri->path + +=item $uri->path( $new_path ) + +Sets and returns the escaped path component of +the $uri (the part between the host name and the query or fragment). +The path can never be undefined, but it can be the empty string. + +=item $uri->path_query + +=item $uri->path_query( $new_path_query ) + +Sets and returns the escaped path and query +components as a single entity. The path and the query are +separated by a "?" character, but the query can itself contain "?". + +=item $uri->path_segments + +=item $uri->path_segments( $segment, ... ) + +Sets and returns the path. In a scalar context, it returns +the same value as $uri->path. In a list context, it returns the +unescaped path segments that make up the path. Path segments that +have parameters are returned as an anonymous array. The first element +is the unescaped path segment proper; subsequent elements are escaped +parameter strings. Such an anonymous array uses overloading so it can +be treated as a string too, but this string does not include the +parameters. + +Note that absolute paths have the empty string as their first +I, i.e. the I C have 3 +I; "", "foo" and "bar". + +=item $uri->query + +=item $uri->query( $new_query ) + +Sets and returns the escaped query component of +the $uri. + +=item $uri->query_form + +=item $uri->query_form( $key1 => $val1, $key2 => $val2, ... ) + +=item $uri->query_form( $key1 => $val1, $key2 => $val2, ..., $delim ) + +=item $uri->query_form( \@key_value_pairs ) + +=item $uri->query_form( \@key_value_pairs, $delim ) + +=item $uri->query_form( \%hash ) + +=item $uri->query_form( \%hash, $delim ) + +Sets and returns query components that use the +I format. Key/value pairs are +separated by "&", and the key is separated from the value by a "=" +character. + +The form can be set either by passing separate key/value pairs, or via +an array or hash reference. Passing an empty array or an empty hash +removes the query component, whereas passing no arguments at all leaves +the component unchanged. The order of keys is undefined if a hash +reference is passed. The old value is always returned as a list of +separate key/value pairs. Assigning this list to a hash is unwise as +the keys returned might repeat. + +The values passed when setting the form can be plain strings or +references to arrays of strings. Passing an array of values has the +same effect as passing the key repeatedly with one value at a time. +All the following statements have the same effect: + + $uri->query_form(foo => 1, foo => 2); + $uri->query_form(foo => [1, 2]); + $uri->query_form([ foo => 1, foo => 2 ]); + $uri->query_form([ foo => [1, 2] ]); + $uri->query_form({ foo => [1, 2] }); + +The $delim parameter can be passed as ";" to force the key/value pairs +to be delimited by ";" instead of "&" in the query string. This +practice is often recommended for URLs embedded in HTML or XML +documents as this avoids the trouble of escaping the "&" character. +You might also set the $URI::DEFAULT_QUERY_FORM_DELIMITER variable to +";" for the same global effect. + +The C module can be loaded to add further methods to +manipulate the form of a URI. See L for details. + +=item $uri->query_keywords + +=item $uri->query_keywords( $keywords, ... ) + +=item $uri->query_keywords( \@keywords ) + +Sets and returns query components that use the +keywords separated by "+" format. + +The keywords can be set either by passing separate keywords directly +or by passing a reference to an array of keywords. Passing an empty +array removes the query component, whereas passing no arguments at +all leaves the component unchanged. The old value is always returned +as a list of separate words. + +=back + +=head1 SERVER METHODS + +For schemes where the I component denotes an Internet host, +the following methods are available in addition to the generic +methods. + +=over 4 + +=item $uri->userinfo + +=item $uri->userinfo( $new_userinfo ) + +Sets and returns the escaped userinfo part of the +authority component. + +For some schemes this is a user name and a password separated by +a colon. This practice is not recommended. Embedding passwords in +clear text (such as URI) has proven to be a security risk in almost +every case where it has been used. + +=item $uri->host + +=item $uri->host( $new_host ) + +Sets and returns the unescaped hostname. + +If the $new_host string ends with a colon and a number, then this +number also sets the port. + +For IPv6 addresses the brackets around the raw address is removed in the return +value from $uri->host. When setting the host attribute to an IPv6 address you +can use a raw address or one enclosed in brackets. The address needs to be +enclosed in brackets if you want to pass in a new port value as well. + +=item $uri->ihost + +Returns the host in Unicode form. Any IDNA A-labels are turned into U-labels. + +=item $uri->port + +=item $uri->port( $new_port ) + +Sets and returns the port. The port is a simple integer +that should be greater than 0. + +If a port is not specified explicitly in the URI, then the URI scheme's default port +is returned. If you don't want the default port +substituted, then you can use the $uri->_port method instead. + +=item $uri->host_port + +=item $uri->host_port( $new_host_port ) + +Sets and returns the host and port as a single +unit. The returned value includes a port, even if it matches the +default port. The host part and the port part are separated by a +colon: ":". + +For IPv6 addresses the bracketing is preserved; thus +URI->new("http://[::1]/")->host_port returns "[::1]:80". Contrast this with +$uri->host which will remove the brackets. + +=item $uri->default_port + +Returns the default port of the URI scheme to which $uri +belongs. For I this is the number 80, for I this +is the number 21, etc. The default port for a scheme can not be +changed. + +=back + +=head1 SCHEME-SPECIFIC SUPPORT + +Scheme-specific support is provided for the following URI schemes. For C +objects that do not belong to one of these, you can only use the common and +generic methods. + +=over 4 + +=item B: + +The I URI scheme is specified in RFC 2397. It allows inclusion +of small data items as "immediate" data, as if it had been included +externally. + +C objects belonging to the data scheme support the common methods +and two new methods to access their scheme-specific components: +$uri->media_type and $uri->data. See L for details. + +=item B: + +An old specification of the I URI scheme is found in RFC 1738. +A new RFC 2396 based specification in not available yet, but file URI +references are in common use. + +C objects belonging to the file scheme support the common and +generic methods. In addition, they provide two methods for mapping file URIs +back to local file names; $uri->file and $uri->dir. See L +for details. + +=item B: + +An old specification of the I URI scheme is found in RFC 1738. A +new RFC 2396 based specification in not available yet, but ftp URI +references are in common use. + +C objects belonging to the ftp scheme support the common, +generic and server methods. In addition, they provide two methods for +accessing the userinfo sub-components: $uri->user and $uri->password. + +=item B: + +The I URI scheme is specified in + and will hopefully be available +as a RFC 2396 based specification. + +C objects belonging to the gopher scheme support the common, +generic and server methods. In addition, they support some methods for +accessing gopher-specific path components: $uri->gopher_type, +$uri->selector, $uri->search, $uri->string. + +=item B: + +The I URI scheme is specified in RFC 2616. +The scheme is used to reference resources hosted by HTTP servers. + +C objects belonging to the http scheme support the common, +generic and server methods. + +=item B: + +The I URI scheme is a Netscape invention which is commonly +implemented. The scheme is used to reference HTTP servers through SSL +connections. Its syntax is the same as http, but the default +port is different. + +=item B: + +The I URI scheme is specified in RFC 2255. LDAP is the +Lightweight Directory Access Protocol. An ldap URI describes an LDAP +search operation to perform to retrieve information from an LDAP +directory. + +C objects belonging to the ldap scheme support the common, +generic and server methods as well as ldap-specific methods: $uri->dn, +$uri->attributes, $uri->scope, $uri->filter, $uri->extensions. See +L for details. + +=item B: + +Like the I URI scheme, but uses a UNIX domain socket. The +server methods are not supported, and the local socket path is +available as $uri->un_path. The I scheme is used by the +OpenLDAP package. There is no real specification for it, but it is +mentioned in various OpenLDAP manual pages. + +=item B: + +Like the I URI scheme, but uses an SSL connection. This +scheme is deprecated, as the preferred way is to use the I +mechanism. + +=item B: + +The I URI scheme is specified in RFC 2368. The scheme was +originally used to designate the Internet mailing address of an +individual or service. It has (in RFC 2368) been extended to allow +setting of other mail header fields and the message body. + +C objects belonging to the mailto scheme support the common +methods and the generic query methods. In addition, they support the +following mailto-specific methods: $uri->to, $uri->headers. + +Note that the "foo@example.com" part of a mailto is I the +C and C but instead the C. This allows a +mailto URI to contain multiple comma separated email addresses. + +=item B: + +The I URL specification can be found at L. +C objects belonging to the mms scheme support the common, +generic, and server methods, with the exception of userinfo and +query-related sub-components. + +=item B: + +The I, I and I URI schemes are specified in + and will hopefully be available as an RFC +2396 based specification soon. + +C objects belonging to the news scheme support the common, +generic and server methods. In addition, they provide some methods to +access the path: $uri->group and $uri->message. + +=item B: + +See I scheme. + +=item B: + +The I URI scheme is specified in RFC 2384. The scheme is used to +reference a POP3 mailbox. + +C objects belonging to the pop scheme support the common, generic +and server methods. In addition, they provide two methods to access the +userinfo components: $uri->user and $uri->auth + +=item B: + +An old specification of the I URI scheme is found in RFC +1738. C objects belonging to the rlogin scheme support the +common, generic and server methods. + +=item B: + +The I URL specification can be found in section 3.2 of RFC 2326. +C objects belonging to the rtsp scheme support the common, +generic, and server methods, with the exception of userinfo and +query-related sub-components. + +=item B: + +The I URI scheme is used to talk to RTSP servers over UDP +instead of TCP. The syntax is the same as rtsp. + +=item B: + +Information about rsync is available from L. +C objects belonging to the rsync scheme support the common, +generic and server methods. In addition, they provide methods to +access the userinfo sub-components: $uri->user and $uri->password. + +=item B: + +The I URI specification is described in sections 19.1 and 25 +of RFC 3261. C objects belonging to the sip scheme support the +common, generic, and server methods with the exception of path related +sub-components. In addition, they provide two methods to get and set +I parameters: $uri->params_form and $uri->params. + +=item B: + +See I scheme. Its syntax is the same as sip, but the default +port is different. + +=item B: + +See I scheme. Its syntax is the same as news, but the default +port is different. + +=item B: + +An old specification of the I URI scheme is found in RFC +1738. C objects belonging to the telnet scheme support the +common, generic and server methods. + +=item B: + +These URIs are used like I URIs but for connections to IBM +mainframes. C objects belonging to the tn3270 scheme support the +common, generic and server methods. + +=item B: + +Information about ssh is available at L. +C objects belonging to the ssh scheme support the common, +generic and server methods. In addition, they provide methods to +access the userinfo sub-components: $uri->user and $uri->password. + +=item B: + +C objects belonging to the sftp scheme support the common, +generic and server methods. In addition, they provide methods to +access the userinfo sub-components: $uri->user and $uri->password. + +=item B: + +The syntax of Uniform Resource Names is specified in RFC 2141. C +objects belonging to the urn scheme provide the common methods, and also the +methods $uri->nid and $uri->nss, which return the Namespace Identifier +and the Namespace-Specific String respectively. + +The Namespace Identifier basically works like the Scheme identifier of +URIs, and further divides the URN namespace. Namespace Identifier +assignments are maintained at +L. + +Letter case is not significant for the Namespace Identifier. It is +always returned in lower case by the $uri->nid method. The $uri->_nid +method can be used if you want it in its original case. + +=item B:B: + +The C namespace contains International Standard Book +Numbers (ISBNs) and is described in RFC 3187. A C object belonging +to this namespace has the following extra methods (if the +Business::ISBN module is available): $uri->isbn, +$uri->isbn_publisher_code, $uri->isbn_group_code (formerly isbn_country_code, +which is still supported by issues a deprecation warning), $uri->isbn_as_ean. + +=item B:B: + +The C namespace contains Object Identifiers (OIDs) and is +described in RFC 3061. An object identifier consists of sequences of digits +separated by dots. A C object belonging to this namespace has an +additional method called $uri->oid that can be used to get/set the oid +value. In a list context, oid numbers are returned as separate elements. + +=back + +=head1 CONFIGURATION VARIABLES + +The following configuration variables influence how the class and its +methods behave: + +=over 4 + +=item $URI::ABS_ALLOW_RELATIVE_SCHEME + +Some older parsers used to allow the scheme name to be present in the +relative URL if it was the same as the base URL scheme. RFC 2396 says +that this should be avoided, but you can enable this old behaviour by +setting the $URI::ABS_ALLOW_RELATIVE_SCHEME variable to a TRUE value. +The difference is demonstrated by the following examples: + + URI->new("http:foo")->abs("http://host/a/b") + ==> "http:foo" + + local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1; + URI->new("http:foo")->abs("http://host/a/b") + ==> "http:/host/a/foo" + + +=item $URI::ABS_REMOTE_LEADING_DOTS + +You can also have the abs() method ignore excess ".." +segments in the relative URI by setting $URI::ABS_REMOTE_LEADING_DOTS +to a TRUE value. The difference is demonstrated by the following +examples: + + URI->new("../../../foo")->abs("http://host/a/b") + ==> "http://host/../../foo" + + local $URI::ABS_REMOTE_LEADING_DOTS = 1; + URI->new("../../../foo")->abs("http://host/a/b") + ==> "http://host/foo" + +=item $URI::DEFAULT_QUERY_FORM_DELIMITER + +This value can be set to ";" to have the query form C pairs +delimited by ";" instead of "&" which is the default. + +=back + +=head1 BUGS + +There are some things that are not quite right: + +=over + +=item * + +Using regexp variables like $1 directly as arguments to the URI accessor methods +does not work too well with current perl implementations. I would argue +that this is actually a bug in perl. The workaround is to quote +them. Example: + + /(...)/ || die; + $u->query("$1"); + + +=item * + +The escaping (percent encoding) of chars in the 128 .. 255 range passed to the +URI constructor or when setting URI parts using the accessor methods depend on +the state of the internal UTF8 flag (see utf8::is_utf8) of the string passed. +If the UTF8 flag is set the UTF-8 encoded version of the character is percent +encoded. If the UTF8 flag isn't set the Latin-1 version (byte) of the +character is percent encoded. This basically exposes the internal encoding of +Perl strings. + +=back + +=head1 PARSING URIs WITH REGEXP + +As an alternative to this module, the following (official) regular +expression can be used to decode a URI: + + my($scheme, $authority, $path, $query, $fragment) = + $uri =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; + +The C module provides the function uri_split() as a +readable alternative. + +=head1 SEE ALSO + +L, L, L, L, +L, L + +RFC 2396: "Uniform Resource Identifiers (URI): Generic Syntax", +Berners-Lee, Fielding, Masinter, August 1998. + +L + +L + +L + +=head1 COPYRIGHT + +Copyright 1995-2009 Gisle Aas. + +Copyright 1995 Martijn Koster. + +This program is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=head1 AUTHORS / ACKNOWLEDGMENTS + +This module is based on the C module, which in turn was +(distantly) based on the C code in the libwww-perl for +perl4 developed by Roy Fielding, as part of the Arcadia project at the +University of California, Irvine, with contributions from Brooks +Cutter. + +C was developed by Gisle Aas, Tim Bunce, Roy Fielding and +Martijn Koster with input from other people on the libwww-perl mailing +list. + +C and related subclasses was developed by Gisle Aas. + +=cut diff --git a/lib/URI/Escape.pm b/lib/URI/Escape.pm new file mode 100644 index 0000000..30f2b31 --- /dev/null +++ b/lib/URI/Escape.pm @@ -0,0 +1,220 @@ +package URI::Escape; + +use strict; +use warnings; + +=head1 NAME + +URI::Escape - Percent-encode and percent-decode unsafe characters + +=head1 SYNOPSIS + + use URI::Escape; + $safe = uri_escape("10% is enough\n"); + $verysafe = uri_escape("foo", "\0-\377"); + $str = uri_unescape($safe); + +=head1 DESCRIPTION + +This module provides functions to percent-encode and percent-decode URI strings as +defined by RFC 3986. Percent-encoding URI's is informally called "URI escaping". +This is the terminology used by this module, which predates the formalization of the +terms by the RFC by several years. + +A URI consists of a restricted set of characters. The restricted set +of characters consists of digits, letters, and a few graphic symbols +chosen from those common to most of the character encodings and input +facilities available to Internet users. They are made up of the +"unreserved" and "reserved" character sets as defined in RFC 3986. + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + reserved = ":" / "/" / "?" / "#" / "[" / "]" / "@" + "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + +In addition, any byte (octet) can be represented in a URI by an escape +sequence: a triplet consisting of the character "%" followed by two +hexadecimal digits. A byte can also be represented directly by a +character, using the US-ASCII character for that octet. + +Some of the characters are I for use as delimiters or as +part of certain URI components. These must be escaped if they are to +be treated as ordinary data. Read RFC 3986 for further details. + +The functions provided (and exported by default) from this module are: + +=over 4 + +=item uri_escape( $string ) + +=item uri_escape( $string, $unsafe ) + +Replaces each unsafe character in the $string with the corresponding +escape sequence and returns the result. The $string argument should +be a string of bytes. The uri_escape() function will croak if given a +characters with code above 255. Use uri_escape_utf8() if you know you +have such chars or/and want chars in the 128 .. 255 range treated as +UTF-8. + +The uri_escape() function takes an optional second argument that +overrides the set of characters that are to be escaped. The set is +specified as a string that can be used in a regular expression +character class (between [ ]). E.g.: + + "\x00-\x1f\x7f-\xff" # all control and hi-bit characters + "a-z" # all lower case characters + "^A-Za-z" # everything not a letter + +The default set of characters to be escaped is all those which are +I part of the C character class shown above as well +as the reserved characters. I.e. the default is: + + "^A-Za-z0-9\-\._~" + +=item uri_escape_utf8( $string ) + +=item uri_escape_utf8( $string, $unsafe ) + +Works like uri_escape(), but will encode chars as UTF-8 before +escaping them. This makes this function able to deal with characters +with code above 255 in $string. Note that chars in the 128 .. 255 +range will be escaped differently by this function compared to what +uri_escape() would. For chars in the 0 .. 127 range there is no +difference. + +Equivalent to: + + utf8::encode($string); + my $uri = uri_escape($string); + +Note: JavaScript has a function called escape() that produces the +sequence "%uXXXX" for chars in the 256 .. 65535 range. This function +has really nothing to do with URI escaping but some folks got confused +since it "does the right thing" in the 0 .. 255 range. Because of +this you sometimes see "URIs" with these kind of escapes. The +JavaScript encodeURIComponent() function is similar to uri_escape_utf8(). + +=item uri_unescape($string,...) + +Returns a string with each %XX sequence replaced with the actual byte +(octet). + +This does the same as: + + $string =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + +but does not modify the string in-place as this RE would. Using the +uri_unescape() function instead of the RE might make the code look +cleaner and is a few characters less to type. + +In a simple benchmark test I did, +calling the function (instead of the inline RE above) if a few chars +were unescaped was something like 40% slower, and something like 700% slower if none were. If +you are going to unescape a lot of times it might be a good idea to +inline the RE. + +If the uri_unescape() function is passed multiple strings, then each +one is returned unescaped. + +=back + +The module can also export the C<%escapes> hash, which contains the +mapping from all 256 bytes to the corresponding escape codes. Lookup +in this hash is faster than evaluating C +each time. + +=head1 SEE ALSO + +L + + +=head1 COPYRIGHT + +Copyright 1995-2004 Gisle Aas. + +This program is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + +use Exporter 5.57 'import'; +our %escapes; +our @EXPORT = qw(uri_escape uri_unescape uri_escape_utf8); +our @EXPORT_OK = qw(%escapes); +our $VERSION = "3.31"; + +use Carp (); + +# Build a char->hex map +for (0..255) { + $escapes{chr($_)} = sprintf("%%%02X", $_); +} + +my %subst; # compiled patterns + +my %Unsafe = ( + RFC2732 => qr/[^A-Za-z0-9\-_.!~*'()]/, + RFC3986 => qr/[^A-Za-z0-9\-\._~]/, +); + +sub uri_escape { + my($text, $patn) = @_; + return undef unless defined $text; + if (defined $patn){ + unless (exists $subst{$patn}) { + # Because we can't compile the regex we fake it with a cached sub + (my $tmp = $patn) =~ s,/,\\/,g; + eval "\$subst{\$patn} = sub {\$_[0] =~ s/([$tmp])/\$escapes{\$1} || _fail_hi(\$1)/ge; }"; + Carp::croak("uri_escape: $@") if $@; + } + &{$subst{$patn}}($text); + } else { + $text =~ s/($Unsafe{RFC3986})/$escapes{$1} || _fail_hi($1)/ge; + } + $text; +} + +sub _fail_hi { + my $chr = shift; + Carp::croak(sprintf "Can't escape \\x{%04X}, try uri_escape_utf8() instead", ord($chr)); +} + +sub uri_escape_utf8 { + my $text = shift; + utf8::encode($text); + return uri_escape($text, @_); +} + +sub uri_unescape { + # Note from RFC1630: "Sequences which start with a percent sign + # but are not followed by two hexadecimal characters are reserved + # for future extension" + my $str = shift; + if (@_ && wantarray) { + # not executed for the common case of a single argument + my @str = ($str, @_); # need to copy + for (@str) { + s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + } + return @str; + } + $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; + $str; +} + +# XXX FIXME escape_char is buggy as it assigns meaning to the string's storage format. +sub escape_char { + # Old versions of utf8::is_utf8() didn't properly handle magical vars (e.g. $1). + # The following forces a fetch to occur beforehand. + my $dummy = substr($_[0], 0, 0); + + if (utf8::is_utf8($_[0])) { + my $s = shift; + utf8::encode($s); + unshift(@_, $s); + } + + return join '', @URI::Escape::escapes{split //, $_[0]}; +} + +1; diff --git a/lib/URI/Heuristic.pm b/lib/URI/Heuristic.pm new file mode 100644 index 0000000..d4ace34 --- /dev/null +++ b/lib/URI/Heuristic.pm @@ -0,0 +1,253 @@ +package URI::Heuristic; + +=head1 NAME + +URI::Heuristic - Expand URI using heuristics + +=head1 SYNOPSIS + + use URI::Heuristic qw(uf_uristr); + $u = uf_uristr("perl"); # http://www.perl.com + $u = uf_uristr("www.sol.no/sol"); # http://www.sol.no/sol + $u = uf_uristr("aas"); # http://www.aas.no + $u = uf_uristr("ftp.funet.fi"); # ftp://ftp.funet.fi + $u = uf_uristr("/etc/passwd"); # file:/etc/passwd + +=head1 DESCRIPTION + +This module provides functions that expand strings into real absolute +URIs using some built-in heuristics. Strings that already represent +absolute URIs (i.e. that start with a C part) are never modified +and are returned unchanged. The main use of these functions is to +allow abbreviated URIs similar to what many web browsers allow for URIs +typed in by the user. + +The following functions are provided: + +=over 4 + +=item uf_uristr($str) + +Tries to make the argument string +into a proper absolute URI string. The "uf_" prefix stands for "User +Friendly". Under MacOS, it assumes that any string with a common URL +scheme (http, ftp, etc.) is a URL rather than a local path. So don't name +your volumes after common URL schemes and expect uf_uristr() to construct +valid file: URL's on those volumes for you, because it won't. + +=item uf_uri($str) + +Works the same way as uf_uristr() but +returns a C object. + +=back + +=head1 ENVIRONMENT + +If the hostname portion of a URI does not contain any dots, then +certain qualified guesses are made. These guesses are governed by +the following environment variables: + +=over 10 + +=item COUNTRY + +The two-letter country code (ISO 3166) for your location. If +the domain name of your host ends with two letters, then it is taken +to be the default country. See also L. + +=item HTTP_ACCEPT_LANGUAGE, LC_ALL, LANG + +If COUNTRY is not set, these standard environment variables are +examined and country (not language) information possibly found in them +is used as the default country. + +=item URL_GUESS_PATTERN + +Contains a space-separated list of URL patterns to try. The string +"ACME" is for some reason used as a placeholder for the host name in +the URL provided. Example: + + URL_GUESS_PATTERN="www.ACME.no www.ACME.se www.ACME.com" + export URL_GUESS_PATTERN + +Specifying URL_GUESS_PATTERN disables any guessing rules based on +country. An empty URL_GUESS_PATTERN disables any guessing that +involves host name lookups. + +=back + +=head1 COPYRIGHT + +Copyright 1997-1998, Gisle Aas + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut + +use strict; +use warnings; + +use Exporter 5.57 'import'; +our @EXPORT_OK = qw(uf_uri uf_uristr uf_url uf_urlstr); +our $VERSION = "4.20"; + +our ($MY_COUNTRY, $DEBUG); + +sub MY_COUNTRY() { + for ($MY_COUNTRY) { + return $_ if defined; + + # First try the environment. + $_ = $ENV{COUNTRY}; + return $_ if defined; + + # Try the country part of LC_ALL and LANG from environment + my @srcs = ($ENV{LC_ALL}, $ENV{LANG}); + # ...and HTTP_ACCEPT_LANGUAGE before those if present + if (my $httplang = $ENV{HTTP_ACCEPT_LANGUAGE}) { + # TODO: q-value processing/ordering + for $httplang (split(/\s*,\s*/, $httplang)) { + if ($httplang =~ /^\s*([a-zA-Z]+)[_-]([a-zA-Z]{2})\s*$/) { + unshift(@srcs, "${1}_${2}"); + last; + } + } + } + for (@srcs) { + next unless defined; + return lc($1) if /^[a-zA-Z]+_([a-zA-Z]{2})(?:[.@]|$)/; + } + + # Last bit of domain name. This may access the network. + require Net::Domain; + my $fqdn = Net::Domain::hostfqdn(); + $_ = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/; + return $_ if defined; + + # Give up. Defined but false. + return ($_ = 0); + } +} + +our %LOCAL_GUESSING = +( + 'us' => [qw(www.ACME.gov www.ACME.mil)], + 'gb' => [qw(www.ACME.co.uk www.ACME.org.uk www.ACME.ac.uk)], + 'au' => [qw(www.ACME.com.au www.ACME.org.au www.ACME.edu.au)], + 'il' => [qw(www.ACME.co.il www.ACME.org.il www.ACME.net.il)], + # send corrections and new entries to +); +# Backwards compatibility; uk != United Kingdom in ISO 3166 +$LOCAL_GUESSING{uk} = $LOCAL_GUESSING{gb}; + + +sub uf_uristr ($) +{ + local($_) = @_; + print STDERR "uf_uristr: resolving $_\n" if $DEBUG; + return unless defined; + + s/^\s+//; + s/\s+$//; + + if (/^(www|web|home)[a-z0-9-]*(?:\.|$)/i) { + $_ = "http://$_"; + + } elsif (/^(ftp|gopher|news|wais|https|http)[a-z0-9-]*(?:\.|$)/i) { + $_ = lc($1) . "://$_"; + + } elsif ($^O ne "MacOS" && + (m,^/, || # absolute file name + m,^\.\.?/, || # relative file name + m,^[a-zA-Z]:[/\\],) # dosish file name + ) + { + $_ = "file:$_"; + + } elsif ($^O eq "MacOS" && m/:/) { + # potential MacOS file name + unless (m/^(ftp|gopher|news|wais|http|https|mailto):/) { + require URI::file; + my $a = URI::file->new($_)->as_string; + $_ = ($a =~ m/^file:/) ? $a : "file:$a"; + } + } elsif (/^\w+([\.\-]\w+)*\@(\w+\.)+\w{2,3}$/) { + $_ = "mailto:$_"; + + } elsif (!/^[a-zA-Z][a-zA-Z0-9.+\-]*:/) { # no scheme specified + if (s/^([-\w]+(?:\.[-\w]+)*)([\/:\?\#]|$)/$2/) { + my $host = $1; + + my $scheme = "http"; + if (/^:(\d+)\b/) { + # Some more or less well known ports + if ($1 =~ /^[56789]?443$/) { + $scheme = "https"; + } elsif ($1 eq "21") { + $scheme = "ftp"; + } + } + + if ($host !~ /\./ && $host ne "localhost") { + my @guess; + if (exists $ENV{URL_GUESS_PATTERN}) { + @guess = map { s/\bACME\b/$host/; $_ } + split(' ', $ENV{URL_GUESS_PATTERN}); + } else { + if (MY_COUNTRY()) { + my $special = $LOCAL_GUESSING{MY_COUNTRY()}; + if ($special) { + my @special = @$special; + push(@guess, map { s/\bACME\b/$host/; $_ } + @special); + } else { + push(@guess, "www.$host." . MY_COUNTRY()); + } + } + push(@guess, map "www.$host.$_", + "com", "org", "net", "edu", "int"); + } + + + my $guess; + for $guess (@guess) { + print STDERR "uf_uristr: gethostbyname('$guess.')..." + if $DEBUG; + if (gethostbyname("$guess.")) { + print STDERR "yes\n" if $DEBUG; + $host = $guess; + last; + } + print STDERR "no\n" if $DEBUG; + } + } + $_ = "$scheme://$host$_"; + + } else { + # pure junk, just return it unchanged... + + } + } + print STDERR "uf_uristr: ==> $_\n" if $DEBUG; + + $_; +} + +sub uf_uri ($) +{ + require URI; + URI->new(uf_uristr($_[0])); +} + +# legacy +*uf_urlstr = \*uf_uristr; + +sub uf_url ($) +{ + require URI::URL; + URI::URL->new(uf_uristr($_[0])); +} + +1; diff --git a/lib/URI/IRI.pm b/lib/URI/IRI.pm new file mode 100644 index 0000000..8906399 --- /dev/null +++ b/lib/URI/IRI.pm @@ -0,0 +1,47 @@ +package URI::IRI; + +# Experimental + +use strict; +use warnings; +use URI (); + +use overload '""' => sub { shift->as_string }; + +our $VERSION = "1.69"; + +sub new { + my($class, $uri, $scheme) = @_; + utf8::upgrade($uri); + return bless { + uri => URI->new($uri, $scheme), + }, $class; +} + +sub clone { + my $self = shift; + return bless { + uri => $self->{uri}->clone, + }, ref($self); +} + +sub as_string { + my $self = shift; + return $self->{uri}->as_iri; +} + +our $AUTOLOAD; +sub AUTOLOAD +{ + my $method = substr($AUTOLOAD, rindex($AUTOLOAD, '::')+2); + + # We create the function here so that it will not need to be + # autoloaded the next time. + no strict 'refs'; + *$method = sub { shift->{uri}->$method(@_) }; + goto &$method; +} + +sub DESTROY {} # avoid AUTOLOADing it + +1; diff --git a/lib/URI/QueryParam.pm b/lib/URI/QueryParam.pm new file mode 100644 index 0000000..7866f79 --- /dev/null +++ b/lib/URI/QueryParam.pm @@ -0,0 +1,207 @@ +package URI::QueryParam; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +sub URI::_query::query_param { + my $self = shift; + my @old = $self->query_form; + + if (@_ == 0) { + # get keys + my (%seen, $i); + return grep !($i++ % 2 || $seen{$_}++), @old; + } + + my $key = shift; + my @i = grep $_ % 2 == 0 && $old[$_] eq $key, 0 .. $#old; + + if (@_) { + my @new = @old; + my @new_i = @i; + my @vals = map { ref($_) eq 'ARRAY' ? @$_ : $_ } @_; + + while (@new_i > @vals) { + splice @new, pop @new_i, 2; + } + if (@vals > @new_i) { + my $i = @new_i ? $new_i[-1] + 2 : @new; + my @splice = splice @vals, @new_i, @vals - @new_i; + + splice @new, $i, 0, map { $key => $_ } @splice; + } + if (@vals) { + #print "SET $new_i[0]\n"; + @new[ map $_ + 1, @new_i ] = @vals; + } + + $self->query_form(\@new); + } + + return wantarray ? @old[map $_+1, @i] : @i ? $old[$i[0]+1] : undef; +} + +sub URI::_query::query_param_append { + my $self = shift; + my $key = shift; + my @vals = map { ref $_ eq 'ARRAY' ? @$_ : $_ } @_; + $self->query_form($self->query_form, $key => \@vals); # XXX + return; +} + +sub URI::_query::query_param_delete { + my $self = shift; + my $key = shift; + my @old = $self->query_form; + my @vals; + + for (my $i = @old - 2; $i >= 0; $i -= 2) { + next if $old[$i] ne $key; + push(@vals, (splice(@old, $i, 2))[1]); + } + $self->query_form(\@old) if @vals; + return wantarray ? reverse @vals : $vals[-1]; +} + +sub URI::_query::query_form_hash { + my $self = shift; + my @old = $self->query_form; + if (@_) { + $self->query_form(@_ == 1 ? %{shift(@_)} : @_); + } + my %hash; + while (my($k, $v) = splice(@old, 0, 2)) { + if (exists $hash{$k}) { + for ($hash{$k}) { + $_ = [$_] unless ref($_) eq "ARRAY"; + push(@$_, $v); + } + } + else { + $hash{$k} = $v; + } + } + return \%hash; +} + +1; + +__END__ + +=head1 NAME + +URI::QueryParam - Additional query methods for URIs + +=head1 SYNOPSIS + + use URI; + use URI::QueryParam; + + $u = URI->new("", "http"); + $u->query_param(foo => 1, 2, 3); + print $u->query; # prints foo=1&foo=2&foo=3 + + for my $key ($u->query_param) { + print "$key: ", join(", ", $u->query_param($key)), "\n"; + } + +=head1 DESCRIPTION + +Loading the C module adds some extra methods to +URIs that support query methods. These methods provide an alternative +interface to the $u->query_form data. + +The query_param_* methods have deliberately been made identical to the +interface of the corresponding C methods. + +The following additional methods are made available: + +=over + +=item @keys = $u->query_param + +=item @values = $u->query_param( $key ) + +=item $first_value = $u->query_param( $key ) + +=item $u->query_param( $key, $value,... ) + +If $u->query_param is called with no arguments, it returns all the +distinct parameter keys of the URI. In a scalar context it returns the +number of distinct keys. + +When a $key argument is given, the method returns the parameter values with the +given key. In a scalar context, only the first parameter value is +returned. + +If additional arguments are given, they are used to update successive +parameters with the given key. If any of the values provided are +array references, then the array is dereferenced to get the actual +values. + +Please note that you can supply multiple values to this method, but you cannot +supply multiple keys. + +Do this: + + $uri->query_param( widget_id => 1, 5, 9 ); + +Do NOT do this: + + $uri->query_param( widget_id => 1, frobnicator_id => 99 ); + +=item $u->query_param_append($key, $value,...) + +Adds new parameters with the given +key without touching any old parameters with the same key. It +can be explained as a more efficient version of: + + $u->query_param($key, + $u->query_param($key), + $value,...); + +One difference is that this expression would return the old values +of $key, whereas the query_param_append() method does not. + +=item @values = $u->query_param_delete($key) + +=item $first_value = $u->query_param_delete($key) + +Deletes all key/value pairs with the given key. +The old values are returned. In a scalar context, only the first value +is returned. + +Using the query_param_delete() method is slightly more efficient than +the equivalent: + + $u->query_param($key, []); + +=item $hashref = $u->query_form_hash + +=item $u->query_form_hash( \%new_form ) + +Returns a reference to a hash that represents the +query form's key/value pairs. If a key occurs multiple times, then the hash +value becomes an array reference. + +Note that sequence information is lost. This means that: + + $u->query_form_hash($u->query_form_hash); + +is not necessarily a no-op, as it may reorder the key/value pairs. +The values returned by the query_param() method should stay the same +though. + +=back + +=head1 SEE ALSO + +L, L + +=head1 COPYRIGHT + +Copyright 2002 Gisle Aas. + +=cut diff --git a/lib/URI/Split.pm b/lib/URI/Split.pm new file mode 100644 index 0000000..6762b3e --- /dev/null +++ b/lib/URI/Split.pm @@ -0,0 +1,97 @@ +package URI::Split; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use Exporter 5.57 'import'; +our @EXPORT_OK = qw(uri_split uri_join); + +use URI::Escape (); + +sub uri_split { + return $_[0] =~ m,(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?,; +} + +sub uri_join { + my($scheme, $auth, $path, $query, $frag) = @_; + my $uri = defined($scheme) ? "$scheme:" : ""; + $path = "" unless defined $path; + if (defined $auth) { + $auth =~ s,([/?\#]), URI::Escape::escape_char($1),eg; + $uri .= "//$auth"; + $path = "/$path" if length($path) && $path !~ m,^/,; + } + elsif ($path =~ m,^//,) { + $uri .= "//"; # XXX force empty auth + } + unless (length $uri) { + $path =~ s,(:), URI::Escape::escape_char($1),e while $path =~ m,^[^:/?\#]+:,; + } + $path =~ s,([?\#]), URI::Escape::escape_char($1),eg; + $uri .= $path; + if (defined $query) { + $query =~ s,(\#), URI::Escape::escape_char($1),eg; + $uri .= "?$query"; + } + $uri .= "#$frag" if defined $frag; + $uri; +} + +1; + +__END__ + +=head1 NAME + +URI::Split - Parse and compose URI strings + +=head1 SYNOPSIS + + use URI::Split qw(uri_split uri_join); + ($scheme, $auth, $path, $query, $frag) = uri_split($uri); + $uri = uri_join($scheme, $auth, $path, $query, $frag); + +=head1 DESCRIPTION + +Provides functions to parse and compose URI +strings. The following functions are provided: + +=over + +=item ($scheme, $auth, $path, $query, $frag) = uri_split($uri) + +Breaks up a URI string into its component +parts. An C value is returned for those parts that are not +present. The $path part is always present (but can be the empty +string) and is thus never returned as C. + +No sensible value is returned if this function is called in a scalar +context. + +=item $uri = uri_join($scheme, $auth, $path, $query, $frag) + +Puts together a URI string from its parts. +Missing parts are signaled by passing C for the corresponding +argument. + +Minimal escaping is applied to parts that contain reserved chars +that would confuse a parser. For instance, any occurrence of '?' or '#' +in $path is always escaped, as it would otherwise be parsed back +as a query or fragment. + +=back + +=head1 SEE ALSO + +L, L + +=head1 COPYRIGHT + +Copyright 2003, Gisle Aas + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut diff --git a/lib/URI/URL.pm b/lib/URI/URL.pm new file mode 100644 index 0000000..ba05eca --- /dev/null +++ b/lib/URI/URL.pm @@ -0,0 +1,303 @@ +package URI::URL; + +use strict; +use warnings; + +use parent 'URI::WithBase'; + +our $VERSION = "5.04"; + +# Provide as much as possible of the old URI::URL interface for backwards +# compatibility... + +use Exporter 5.57 'import'; +our @EXPORT = qw(url); + +# Easy to use constructor +sub url ($;$) { URI::URL->new(@_); } + +use URI::Escape qw(uri_unescape); + +sub new +{ + my $class = shift; + my $self = $class->SUPER::new(@_); + $self->[0] = $self->[0]->canonical; + $self; +} + +sub newlocal +{ + my $class = shift; + require URI::file; + bless [URI::file->new_abs(shift)], $class; +} + +{package URI::_foreign; + sub _init # hope it is not defined + { + my $class = shift; + die "Unknown URI::URL scheme $_[1]:" if $URI::URL::STRICT; + $class->SUPER::_init(@_); + } +} + +sub strict +{ + my $old = $URI::URL::STRICT; + $URI::URL::STRICT = shift if @_; + $old; +} + +sub print_on +{ + my $self = shift; + require Data::Dumper; + print STDERR Data::Dumper::Dumper($self); +} + +sub _try +{ + my $self = shift; + my $method = shift; + scalar(eval { $self->$method(@_) }); +} + +sub crack +{ + # should be overridden by subclasses + my $self = shift; + (scalar($self->scheme), + $self->_try("user"), + $self->_try("password"), + $self->_try("host"), + $self->_try("port"), + $self->_try("path"), + $self->_try("params"), + $self->_try("query"), + scalar($self->fragment), + ) +} + +sub full_path +{ + my $self = shift; + my $path = $self->path_query; + $path = "/" unless length $path; + $path; +} + +sub netloc +{ + shift->authority(@_); +} + +sub epath +{ + my $path = shift->SUPER::path(@_); + $path =~ s/;.*//; + $path; +} + +sub eparams +{ + my $self = shift; + my @p = $self->path_segments; + return undef unless ref($p[-1]); + @p = @{$p[-1]}; + shift @p; + join(";", @p); +} + +sub params { shift->eparams(@_); } + +sub path { + my $self = shift; + my $old = $self->epath(@_); + return unless defined wantarray; + return '/' if !defined($old) || !length($old); + Carp::croak("Path components contain '/' (you must call epath)") + if $old =~ /%2[fF]/ and !@_; + $old = "/$old" if $old !~ m|^/| && defined $self->netloc; + return uri_unescape($old); +} + +sub path_components { + shift->path_segments(@_); +} + +sub query { + my $self = shift; + my $old = $self->equery(@_); + if (defined(wantarray) && defined($old)) { + if ($old =~ /%(?:26|2[bB]|3[dD])/) { # contains escaped '=' '&' or '+' + my $mess; + for ($old) { + $mess = "Query contains both '+' and '%2B'" + if /\+/ && /%2[bB]/; + $mess = "Form query contains escaped '=' or '&'" + if /=/ && /%(?:3[dD]|26)/; + } + if ($mess) { + Carp::croak("$mess (you must call equery)"); + } + } + # Now it should be safe to unescape the string without losing + # information + return uri_unescape($old); + } + undef; + +} + +sub abs +{ + my $self = shift; + my $base = shift; + my $allow_scheme = shift; + $allow_scheme = $URI::URL::ABS_ALLOW_RELATIVE_SCHEME + unless defined $allow_scheme; + local $URI::ABS_ALLOW_RELATIVE_SCHEME = $allow_scheme; + local $URI::ABS_REMOTE_LEADING_DOTS = $URI::URL::ABS_REMOTE_LEADING_DOTS; + $self->SUPER::abs($base); +} + +sub frag { shift->fragment(@_); } +sub keywords { shift->query_keywords(@_); } + +# file: +sub local_path { shift->file; } +sub unix_path { shift->file("unix"); } +sub dos_path { shift->file("dos"); } +sub mac_path { shift->file("mac"); } +sub vms_path { shift->file("vms"); } + +# mailto: +sub address { shift->to(@_); } +sub encoded822addr { shift->to(@_); } +sub URI::mailto::authority { shift->to(@_); } # make 'netloc' method work + +# news: +sub groupart { shift->_group(@_); } +sub article { shift->message(@_); } + +1; + +__END__ + +=head1 NAME + +URI::URL - Uniform Resource Locators + +=head1 SYNOPSIS + + $u1 = URI::URL->new($str, $base); + $u2 = $u1->abs; + +=head1 DESCRIPTION + +This module is provided for backwards compatibility with modules that +depend on the interface provided by the C class that used to +be distributed with the libwww-perl library. + +The following differences exist compared to the C class interface: + +=over 3 + +=item * + +The URI::URL module exports the url() function as an alternate +constructor interface. + +=item * + +The constructor takes an optional $base argument. The C +class is a subclass of C. + +=item * + +The URI::URL->newlocal class method is the same as URI::file->new_abs. + +=item * + +URI::URL::strict(1) + +=item * + +$url->print_on method + +=item * + +$url->crack method + +=item * + +$url->full_path: same as ($uri->abs_path || "/") + +=item * + +$url->netloc: same as $uri->authority + +=item * + +$url->epath, $url->equery: same as $uri->path, $uri->query + +=item * + +$url->path and $url->query pass unescaped strings. + +=item * + +$url->path_components: same as $uri->path_segments (if you don't +consider path segment parameters) + +=item * + +$url->params and $url->eparams methods + +=item * + +$url->base method. See L. + +=item * + +$url->abs and $url->rel have an optional $base argument. See +L. + +=item * + +$url->frag: same as $uri->fragment + +=item * + +$url->keywords: same as $uri->query_keywords + +=item * + +$url->localpath and friends map to $uri->file. + +=item * + +$url->address and $url->encoded822addr: same as $uri->to for mailto URI + +=item * + +$url->groupart method for news URI + +=item * + +$url->article: same as $uri->message + +=back + + + +=head1 SEE ALSO + +L, L + +=head1 COPYRIGHT + +Copyright 1998-2000 Gisle Aas. + +=cut diff --git a/lib/URI/WithBase.pm b/lib/URI/WithBase.pm new file mode 100644 index 0000000..943b7b5 --- /dev/null +++ b/lib/URI/WithBase.pm @@ -0,0 +1,174 @@ +package URI::WithBase; + +use strict; +use warnings; + +use URI; +use Scalar::Util 'blessed'; + +our $VERSION = "2.20"; + +use overload '""' => "as_string", fallback => 1; + +sub as_string; # help overload find it + +sub new +{ + my($class, $uri, $base) = @_; + my $ibase = $base; + if ($base && blessed($base) && $base->isa(__PACKAGE__)) { + $base = $base->abs; + $ibase = $base->[0]; + } + bless [URI->new($uri, $ibase), $base], $class; +} + +sub new_abs +{ + my $class = shift; + my $self = $class->new(@_); + $self->abs; +} + +sub _init +{ + my $class = shift; + my($str, $scheme) = @_; + bless [URI->new($str, $scheme), undef], $class; +} + +sub eq +{ + my($self, $other) = @_; + $other = $other->[0] if blessed($other) and $other->isa(__PACKAGE__); + $self->[0]->eq($other); +} + +our $AUTOLOAD; +sub AUTOLOAD +{ + my $self = shift; + my $method = substr($AUTOLOAD, rindex($AUTOLOAD, '::')+2); + return if $method eq "DESTROY"; + $self->[0]->$method(@_); +} + +sub can { # override UNIVERSAL::can + my $self = shift; + $self->SUPER::can(@_) || ( + ref($self) + ? $self->[0]->can(@_) + : undef + ) +} + +sub base { + my $self = shift; + my $base = $self->[1]; + + if (@_) { # set + my $new_base = shift; + # ensure absoluteness + $new_base = $new_base->abs if ref($new_base) && $new_base->isa(__PACKAGE__); + $self->[1] = $new_base; + } + return unless defined wantarray; + + # The base attribute supports 'lazy' conversion from URL strings + # to URL objects. Strings may be stored but when a string is + # fetched it will automatically be converted to a URL object. + # The main benefit is to make it much cheaper to say: + # URI::WithBase->new($random_url_string, 'http:') + if (defined($base) && !ref($base)) { + $base = ref($self)->new($base); + $self->[1] = $base unless @_; + } + $base; +} + +sub clone +{ + my $self = shift; + my $base = $self->[1]; + $base = $base->clone if ref($base); + bless [$self->[0]->clone, $base], ref($self); +} + +sub abs +{ + my $self = shift; + my $base = shift || $self->base || return $self->clone; + $base = $base->as_string if ref($base); + bless [$self->[0]->abs($base, @_), $base], ref($self); +} + +sub rel +{ + my $self = shift; + my $base = shift || $self->base || return $self->clone; + $base = $base->as_string if ref($base); + bless [$self->[0]->rel($base, @_), $base], ref($self); +} + +1; + +__END__ + +=head1 NAME + +URI::WithBase - URIs which remember their base + +=head1 SYNOPSIS + + $u1 = URI::WithBase->new($str, $base); + $u2 = $u1->abs; + + $base = $u1->base; + $u1->base( $new_base ) + +=head1 DESCRIPTION + +This module provides the C class. Objects of this class +are like C objects, but can keep their base too. The base +represents the context where this URI was found and can be used to +absolutize or relativize the URI. All the methods described in L +are supported for C objects. + +The methods provided in addition to or modified from those of C are: + +=over 4 + +=item $uri = URI::WithBase->new($str, [$base]) + +The constructor takes an optional base URI as the second argument. +If provided, this argument initializes the base attribute. + +=item $uri->base( [$new_base] ) + +Can be used to get or set the value of the base attribute. +The return value, which is the old value, is a URI object or C. + +=item $uri->abs( [$base_uri] ) + +The $base_uri argument is now made optional as the object carries its +base with it. A new object is returned even if $uri is already +absolute (while plain URI objects simply return themselves in +that case). + +=item $uri->rel( [$base_uri] ) + +The $base_uri argument is now made optional as the object carries its +base with it. A new object is always returned. + +=back + + +=head1 SEE ALSO + +L + +=head1 COPYRIGHT + +Copyright 1998-2002 Gisle Aas. + +=cut diff --git a/lib/URI/_foreign.pm b/lib/URI/_foreign.pm new file mode 100644 index 0000000..cad5c8d --- /dev/null +++ b/lib/URI/_foreign.pm @@ -0,0 +1,10 @@ +package URI::_foreign; + +use strict; +use warnings; + +use parent 'URI::_generic'; + +our $VERSION = "1.69"; + +1; diff --git a/lib/URI/_generic.pm b/lib/URI/_generic.pm new file mode 100644 index 0000000..42f6609 --- /dev/null +++ b/lib/URI/_generic.pm @@ -0,0 +1,256 @@ +package URI::_generic; + +use strict; +use warnings; + +use parent qw(URI URI::_query); + +use URI::Escape qw(uri_unescape); +use Carp (); + +our $VERSION = "1.69"; + +my $ACHAR = $URI::uric; $ACHAR =~ s,\\[/?],,g; +my $PCHAR = $URI::uric; $PCHAR =~ s,\\[?],,g; + +sub _no_scheme_ok { 1 } + +sub authority +{ + my $self = shift; + $$self =~ m,^((?:$URI::scheme_re:)?)(?://([^/?\#]*))?(.*)$,os or die; + + if (@_) { + my $auth = shift; + $$self = $1; + my $rest = $3; + if (defined $auth) { + $auth =~ s/([^$ACHAR])/ URI::Escape::escape_char($1)/ego; + utf8::downgrade($auth); + $$self .= "//$auth"; + } + _check_path($rest, $$self); + $$self .= $rest; + } + $2; +} + +sub path +{ + my $self = shift; + $$self =~ m,^((?:[^:/?\#]+:)?(?://[^/?\#]*)?)([^?\#]*)(.*)$,s or die; + + if (@_) { + $$self = $1; + my $rest = $3; + my $new_path = shift; + $new_path = "" unless defined $new_path; + $new_path =~ s/([^$PCHAR])/ URI::Escape::escape_char($1)/ego; + utf8::downgrade($new_path); + _check_path($new_path, $$self); + $$self .= $new_path . $rest; + } + $2; +} + +sub path_query +{ + my $self = shift; + $$self =~ m,^((?:[^:/?\#]+:)?(?://[^/?\#]*)?)([^\#]*)(.*)$,s or die; + + if (@_) { + $$self = $1; + my $rest = $3; + my $new_path = shift; + $new_path = "" unless defined $new_path; + $new_path =~ s/([^$URI::uric])/ URI::Escape::escape_char($1)/ego; + utf8::downgrade($new_path); + _check_path($new_path, $$self); + $$self .= $new_path . $rest; + } + $2; +} + +sub _check_path +{ + my($path, $pre) = @_; + my $prefix; + if ($pre =~ m,/,) { # authority present + $prefix = "/" if length($path) && $path !~ m,^[/?\#],; + } + else { + if ($path =~ m,^//,) { + Carp::carp("Path starting with double slash is confusing") + if $^W; + } + elsif (!length($pre) && $path =~ m,^[^:/?\#]+:,) { + Carp::carp("Path might look like scheme, './' prepended") + if $^W; + $prefix = "./"; + } + } + substr($_[0], 0, 0) = $prefix if defined $prefix; +} + +sub path_segments +{ + my $self = shift; + my $path = $self->path; + if (@_) { + my @arg = @_; # make a copy + for (@arg) { + if (ref($_)) { + my @seg = @$_; + $seg[0] =~ s/%/%25/g; + for (@seg) { s/;/%3B/g; } + $_ = join(";", @seg); + } + else { + s/%/%25/g; s/;/%3B/g; + } + s,/,%2F,g; + } + $self->path(join("/", @arg)); + } + return $path unless wantarray; + map {/;/ ? $self->_split_segment($_) + : uri_unescape($_) } + split('/', $path, -1); +} + + +sub _split_segment +{ + my $self = shift; + require URI::_segment; + URI::_segment->new(@_); +} + + +sub abs +{ + my $self = shift; + my $base = shift || Carp::croak("Missing base argument"); + + if (my $scheme = $self->scheme) { + return $self unless $URI::ABS_ALLOW_RELATIVE_SCHEME; + $base = URI->new($base) unless ref $base; + return $self unless $scheme eq $base->scheme; + } + + $base = URI->new($base) unless ref $base; + my $abs = $self->clone; + $abs->scheme($base->scheme); + return $abs if $$self =~ m,^(?:$URI::scheme_re:)?//,o; + $abs->authority($base->authority); + + my $path = $self->path; + return $abs if $path =~ m,^/,; + + if (!length($path)) { + my $abs = $base->clone; + my $query = $self->query; + $abs->query($query) if defined $query; + my $fragment = $self->fragment; + $abs->fragment($fragment) if defined $fragment; + return $abs; + } + + my $p = $base->path; + $p =~ s,[^/]+$,,; + $p .= $path; + my @p = split('/', $p, -1); + shift(@p) if @p && !length($p[0]); + my $i = 1; + while ($i < @p) { + #print "$i ", join("/", @p), " ($p[$i])\n"; + if ($p[$i-1] eq ".") { + splice(@p, $i-1, 1); + $i-- if $i > 1; + } + elsif ($p[$i] eq ".." && $p[$i-1] ne "..") { + splice(@p, $i-1, 2); + if ($i > 1) { + $i--; + push(@p, "") if $i == @p; + } + } + else { + $i++; + } + } + $p[-1] = "" if @p && $p[-1] eq "."; # trailing "/." + if ($URI::ABS_REMOTE_LEADING_DOTS) { + shift @p while @p && $p[0] =~ /^\.\.?$/; + } + $abs->path("/" . join("/", @p)); + $abs; +} + +# The opposite of $url->abs. Return a URI which is as relative as possible +sub rel { + my $self = shift; + my $base = shift || Carp::croak("Missing base argument"); + my $rel = $self->clone; + $base = URI->new($base) unless ref $base; + + #my($scheme, $auth, $path) = @{$rel}{qw(scheme authority path)}; + my $scheme = $rel->scheme; + my $auth = $rel->canonical->authority; + my $path = $rel->path; + + if (!defined($scheme) && !defined($auth)) { + # it is already relative + return $rel; + } + + #my($bscheme, $bauth, $bpath) = @{$base}{qw(scheme authority path)}; + my $bscheme = $base->scheme; + my $bauth = $base->canonical->authority; + my $bpath = $base->path; + + for ($bscheme, $bauth, $auth) { + $_ = '' unless defined + } + + unless ($scheme eq $bscheme && $auth eq $bauth) { + # different location, can't make it relative + return $rel; + } + + for ($path, $bpath) { $_ = "/$_" unless m,^/,; } + + # Make it relative by eliminating scheme and authority + $rel->scheme(undef); + $rel->authority(undef); + + # This loop is based on code from Nicolai Langfeldt . + # First we calculate common initial path components length ($li). + my $li = 1; + while (1) { + my $i = index($path, '/', $li); + last if $i < 0 || + $i != index($bpath, '/', $li) || + substr($path,$li,$i-$li) ne substr($bpath,$li,$i-$li); + $li=$i+1; + } + # then we nuke it from both paths + substr($path, 0,$li) = ''; + substr($bpath,0,$li) = ''; + + if ($path eq $bpath && + defined($rel->fragment) && + !defined($rel->query)) { + $rel->path(""); + } + else { + # Add one "../" for each path component left in the base path + $path = ('../' x $bpath =~ tr|/|/|) . $path; + $path = "./" if $path eq ""; + $rel->path($path); + } + + $rel; +} + +1; diff --git a/lib/URI/_idna.pm b/lib/URI/_idna.pm new file mode 100644 index 0000000..ce58db8 --- /dev/null +++ b/lib/URI/_idna.pm @@ -0,0 +1,91 @@ +package URI::_idna; + +# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep) +# based on Python-2.6.4/Lib/encodings/idna.py + +use strict; +use warnings; + +use URI::_punycode qw(encode_punycode decode_punycode); +use Carp qw(croak); + +our $VERSION = "1.69"; + +BEGIN { + *URI::_idna::_ENV_::JOIN_LEAKS_UTF8_FLAGS = $] < 5.008_003 + ? sub () { 1 } + : sub () { 0 } + ; +} + +my $ASCII = qr/^[\x00-\x7F]*\z/; + +sub encode { + my $idomain = shift; + my @labels = split(/\./, $idomain, -1); + my @last_empty; + push(@last_empty, pop @labels) if @labels > 1 && $labels[-1] eq ""; + for (@labels) { + $_ = ToASCII($_); + } + + return eval 'join(".", @labels, @last_empty)' if URI::_idna::_ENV_::JOIN_LEAKS_UTF8_FLAGS; + return join(".", @labels, @last_empty); +} + +sub decode { + my $domain = shift; + return join(".", map ToUnicode($_), split(/\./, $domain, -1)) +} + +sub nameprep { # XXX real implementation missing + my $label = shift; + $label = lc($label); + return $label; +} + +sub check_size { + my $label = shift; + croak "Label empty" if $label eq ""; + croak "Label too long" if length($label) > 63; + return $label; +} + +sub ToASCII { + my $label = shift; + return check_size($label) if $label =~ $ASCII; + + # Step 2: nameprep + $label = nameprep($label); + # Step 3: UseSTD3ASCIIRules is false + # Step 4: try ASCII again + return check_size($label) if $label =~ $ASCII; + + # Step 5: Check ACE prefix + if ($label =~ /^xn--/) { + croak "Label starts with ACE prefix"; + } + + # Step 6: Encode with PUNYCODE + $label = encode_punycode($label); + + # Step 7: Prepend ACE prefix + $label = "xn--$label"; + + # Step 8: Check size + return check_size($label); +} + +sub ToUnicode { + my $label = shift; + $label = nameprep($label) unless $label =~ $ASCII; + return $label unless $label =~ /^xn--/; + my $result = decode_punycode(substr($label, 4)); + my $label2 = ToASCII($result); + if (lc($label) ne $label2) { + croak "IDNA does not round-trip: '\L$label\E' vs '$label2'"; + } + return $result; +} + +1; diff --git a/lib/URI/_ldap.pm b/lib/URI/_ldap.pm new file mode 100644 index 0000000..02f468c --- /dev/null +++ b/lib/URI/_ldap.pm @@ -0,0 +1,140 @@ +# Copyright (c) 1998 Graham Barr . All rights reserved. +# This program is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. + +package URI::_ldap; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use URI::Escape qw(uri_unescape); + +sub _ldap_elem { + my $self = shift; + my $elem = shift; + my $query = $self->query; + my @bits = (split(/\?/,defined($query) ? $query : ""),("")x4); + my $old = $bits[$elem]; + + if (@_) { + my $new = shift; + $new =~ s/\?/%3F/g; + $bits[$elem] = $new; + $query = join("?",@bits); + $query =~ s/\?+$//; + $query = undef unless length($query); + $self->query($query); + } + + $old; +} + +sub dn { + my $old = shift->path(@_); + $old =~ s:^/::; + uri_unescape($old); +} + +sub attributes { + my $self = shift; + my $old = _ldap_elem($self,0, @_ ? join(",", map { my $tmp = $_; $tmp =~ s/,/%2C/g; $tmp } @_) : ()); + return $old unless wantarray; + map { uri_unescape($_) } split(/,/,$old); +} + +sub _scope { + my $self = shift; + my $old = _ldap_elem($self,1, @_); + return undef unless defined wantarray && defined $old; + uri_unescape($old); +} + +sub scope { + my $old = &_scope; + $old = "base" unless length $old; + $old; +} + +sub _filter { + my $self = shift; + my $old = _ldap_elem($self,2, @_); + return undef unless defined wantarray && defined $old; + uri_unescape($old); # || "(objectClass=*)"; +} + +sub filter { + my $old = &_filter; + $old = "(objectClass=*)" unless length $old; + $old; +} + +sub extensions { + my $self = shift; + my @ext; + while (@_) { + my $key = shift; + my $value = shift; + push(@ext, join("=", map { $_="" unless defined; s/,/%2C/g; $_ } $key, $value)); + } + @ext = join(",", @ext) if @ext; + my $old = _ldap_elem($self,3, @ext); + return $old unless wantarray; + map { uri_unescape($_) } map { /^([^=]+)=(.*)$/ } split(/,/,$old); +} + +sub canonical +{ + my $self = shift; + my $other = $self->_nonldap_canonical; + + # The stuff below is not as efficient as one might hope... + + $other = $other->clone if $other == $self; + + $other->dn(_normalize_dn($other->dn)); + + # Should really know about mixed case "postalAddress", etc... + $other->attributes(map lc, $other->attributes); + + # Lowercase scope, remove default + my $old_scope = $other->scope; + my $new_scope = lc($old_scope); + $new_scope = "" if $new_scope eq "base"; + $other->scope($new_scope) if $new_scope ne $old_scope; + + # Remove filter if default + my $old_filter = $other->filter; + $other->filter("") if lc($old_filter) eq "(objectclass=*)" || + lc($old_filter) eq "objectclass=*"; + + # Lowercase extensions types and deal with known extension values + my @ext = $other->extensions; + for (my $i = 0; $i < @ext; $i += 2) { + my $etype = $ext[$i] = lc($ext[$i]); + if ($etype =~ /^!?bindname$/) { + $ext[$i+1] = _normalize_dn($ext[$i+1]); + } + } + $other->extensions(@ext) if @ext; + + $other; +} + +sub _normalize_dn # RFC 2253 +{ + my $dn = shift; + + return $dn; + # The code below will fail if the "+" or "," is embedding in a quoted + # string or simply escaped... + + my @dn = split(/([+,])/, $dn); + for (@dn) { + s/^([a-zA-Z]+=)/lc($1)/e; + } + join("", @dn); +} + +1; diff --git a/lib/URI/_login.pm b/lib/URI/_login.pm new file mode 100644 index 0000000..ef58e24 --- /dev/null +++ b/lib/URI/_login.pm @@ -0,0 +1,13 @@ +package URI::_login; + +use strict; +use warnings; + +use parent qw(URI::_server URI::_userpass); + +our $VERSION = "1.69"; + +# Generic terminal logins. This is used as a base class for 'telnet', +# 'tn3270', and 'rlogin' URL schemes. + +1; diff --git a/lib/URI/_punycode.pm b/lib/URI/_punycode.pm new file mode 100644 index 0000000..f54eee5 --- /dev/null +++ b/lib/URI/_punycode.pm @@ -0,0 +1,203 @@ +package URI::_punycode; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use Exporter 'import'; +our @EXPORT = qw(encode_punycode decode_punycode); + +use integer; + +our $DEBUG = 0; + +use constant BASE => 36; +use constant TMIN => 1; +use constant TMAX => 26; +use constant SKEW => 38; +use constant DAMP => 700; +use constant INITIAL_BIAS => 72; +use constant INITIAL_N => 128; + +my $Delimiter = chr 0x2D; +my $BasicRE = qr/[\x00-\x7f]/; + +sub _croak { require Carp; Carp::croak(@_); } + +sub digit_value { + my $code = shift; + return ord($code) - ord("A") if $code =~ /[A-Z]/; + return ord($code) - ord("a") if $code =~ /[a-z]/; + return ord($code) - ord("0") + 26 if $code =~ /[0-9]/; + return; +} + +sub code_point { + my $digit = shift; + return $digit + ord('a') if 0 <= $digit && $digit <= 25; + return $digit + ord('0') - 26 if 26 <= $digit && $digit <= 36; + die 'NOT COME HERE'; +} + +sub adapt { + my($delta, $numpoints, $firsttime) = @_; + $delta = $firsttime ? $delta / DAMP : $delta / 2; + $delta += $delta / $numpoints; + my $k = 0; + while ($delta > ((BASE - TMIN) * TMAX) / 2) { + $delta /= BASE - TMIN; + $k += BASE; + } + return $k + (((BASE - TMIN + 1) * $delta) / ($delta + SKEW)); +} + +sub decode_punycode { + my $code = shift; + + my $n = INITIAL_N; + my $i = 0; + my $bias = INITIAL_BIAS; + my @output; + + if ($code =~ s/(.*)$Delimiter//o) { + push @output, map ord, split //, $1; + return _croak('non-basic code point') unless $1 =~ /^$BasicRE*$/o; + } + + while ($code) { + my $oldi = $i; + my $w = 1; + LOOP: + for (my $k = BASE; 1; $k += BASE) { + my $cp = substr($code, 0, 1, ''); + my $digit = digit_value($cp); + defined $digit or return _croak("invalid punycode input"); + $i += $digit * $w; + my $t = ($k <= $bias) ? TMIN + : ($k >= $bias + TMAX) ? TMAX : $k - $bias; + last LOOP if $digit < $t; + $w *= (BASE - $t); + } + $bias = adapt($i - $oldi, @output + 1, $oldi == 0); + warn "bias becomes $bias" if $DEBUG; + $n += $i / (@output + 1); + $i = $i % (@output + 1); + splice(@output, $i, 0, $n); + warn join " ", map sprintf('%04x', $_), @output if $DEBUG; + $i++; + } + return join '', map chr, @output; +} + +sub encode_punycode { + my $input = shift; + my @input = split //, $input; + + my $n = INITIAL_N; + my $delta = 0; + my $bias = INITIAL_BIAS; + + my @output; + my @basic = grep /$BasicRE/, @input; + my $h = my $b = @basic; + push @output, @basic; + push @output, $Delimiter if $b && $h < @input; + warn "basic codepoints: (@output)" if $DEBUG; + + while ($h < @input) { + my $m = min(grep { $_ >= $n } map ord, @input); + warn sprintf "next code point to insert is %04x", $m if $DEBUG; + $delta += ($m - $n) * ($h + 1); + $n = $m; + for my $i (@input) { + my $c = ord($i); + $delta++ if $c < $n; + if ($c == $n) { + my $q = $delta; + LOOP: + for (my $k = BASE; 1; $k += BASE) { + my $t = ($k <= $bias) ? TMIN : + ($k >= $bias + TMAX) ? TMAX : $k - $bias; + last LOOP if $q < $t; + my $cp = code_point($t + (($q - $t) % (BASE - $t))); + push @output, chr($cp); + $q = ($q - $t) / (BASE - $t); + } + push @output, chr(code_point($q)); + $bias = adapt($delta, $h + 1, $h == $b); + warn "bias becomes $bias" if $DEBUG; + $delta = 0; + $h++; + } + } + $delta++; + $n++; + } + return join '', @output; +} + +sub min { + my $min = shift; + for (@_) { $min = $_ if $_ <= $min } + return $min; +} + +1; +__END__ + +=head1 NAME + +URI::_punycode - encodes Unicode string in Punycode + +=head1 SYNOPSIS + + use URI::_punycode; + $punycode = encode_punycode($unicode); + $unicode = decode_punycode($punycode); + +=head1 DESCRIPTION + +URI::_punycode is a module to encode / decode Unicode strings into +Punycode, an efficient encoding of Unicode for use with IDNA. + +This module requires Perl 5.6.0 or over to handle UTF8 flagged Unicode +strings. + +=head1 FUNCTIONS + +This module exports following functions by default. + +=over 4 + +=item encode_punycode + + $punycode = encode_punycode($unicode); + +takes Unicode string (UTF8-flagged variable) and returns Punycode +encoding for it. + +=item decode_punycode + + $unicode = decode_punycode($punycode) + +takes Punycode encoding and returns original Unicode string. + +=back + +These functions throw exceptions on failure. You can catch 'em via +C. + +=head1 AUTHOR + +Tatsuhiko Miyagawa Emiyagawa@bulknews.netE is the author of +IDNA::Punycode v0.02 which was the basis for this module. + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=head1 SEE ALSO + +L, RFC 3492 + +=cut diff --git a/lib/URI/_query.pm b/lib/URI/_query.pm new file mode 100644 index 0000000..a5976a3 --- /dev/null +++ b/lib/URI/_query.pm @@ -0,0 +1,97 @@ +package URI::_query; + +use strict; +use warnings; + +use URI (); +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub query +{ + my $self = shift; + $$self =~ m,^([^?\#]*)(?:\?([^\#]*))?(.*)$,s or die; + + if (@_) { + my $q = shift; + $$self = $1; + if (defined $q) { + $q =~ s/([^$URI::uric])/ URI::Escape::escape_char($1)/ego; + utf8::downgrade($q); + $$self .= "?$q"; + } + $$self .= $3; + } + $2; +} + +# Handle ...?foo=bar&bar=foo type of query +sub query_form { + my $self = shift; + my $old = $self->query; + if (@_) { + # Try to set query string + my $delim; + my $r = $_[0]; + if (ref($r) eq "ARRAY") { + $delim = $_[1]; + @_ = @$r; + } + elsif (ref($r) eq "HASH") { + $delim = $_[1]; + @_ = map { $_ => $r->{$_} } sort keys %$r; + } + $delim = pop if @_ % 2; + + my @query; + while (my($key,$vals) = splice(@_, 0, 2)) { + $key = '' unless defined $key; + $key =~ s/([;\/?:@&=+,\$\[\]%])/ URI::Escape::escape_char($1)/eg; + $key =~ s/ /+/g; + $vals = [ref($vals) eq "ARRAY" ? @$vals : $vals]; + for my $val (@$vals) { + $val = '' unless defined $val; + $val =~ s/([;\/?:@&=+,\$\[\]%])/ URI::Escape::escape_char($1)/eg; + $val =~ s/ /+/g; + push(@query, "$key=$val"); + } + } + if (@query) { + unless ($delim) { + $delim = $1 if $old && $old =~ /([&;])/; + $delim ||= $URI::DEFAULT_QUERY_FORM_DELIMITER || "&"; + } + $self->query(join($delim, @query)); + } + else { + $self->query(undef); + } + } + return if !defined($old) || !length($old) || !defined(wantarray); + return unless $old =~ /=/; # not a form + map { s/\+/ /g; uri_unescape($_) } + map { /=/ ? split(/=/, $_, 2) : ($_ => '')} split(/[&;]/, $old); +} + +# Handle ...?dog+bones type of query +sub query_keywords +{ + my $self = shift; + my $old = $self->query; + if (@_) { + # Try to set query string + my @copy = @_; + @copy = @{$copy[0]} if @copy == 1 && ref($copy[0]) eq "ARRAY"; + for (@copy) { s/([;\/?:@&=+,\$\[\]%])/ URI::Escape::escape_char($1)/eg; } + $self->query(@copy ? join('+', @copy) : undef); + } + return if !defined($old) || !defined(wantarray); + return if $old =~ /=/; # not keywords, but a form + map { uri_unescape($_) } split(/\+/, $old, -1); +} + +# Some URI::URL compatibility stuff +sub equery { goto &query } + +1; diff --git a/lib/URI/_segment.pm b/lib/URI/_segment.pm new file mode 100644 index 0000000..3b27289 --- /dev/null +++ b/lib/URI/_segment.pm @@ -0,0 +1,24 @@ +package URI::_segment; + +# Represents a generic path_segment so that it can be treated as +# a string too. + +use strict; +use warnings; + +use URI::Escape qw(uri_unescape); + +use overload '""' => sub { $_[0]->[0] }, + fallback => 1; + +our $VERSION = "1.69"; + +sub new +{ + my $class = shift; + my @segment = split(';', shift, -1); + $segment[0] = uri_unescape($segment[0]); + bless \@segment, $class; +} + +1; diff --git a/lib/URI/_server.pm b/lib/URI/_server.pm new file mode 100644 index 0000000..4df158e --- /dev/null +++ b/lib/URI/_server.pm @@ -0,0 +1,166 @@ +package URI::_server; + +use strict; +use warnings; + +use parent 'URI::_generic'; + +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub _uric_escape { + my($class, $str) = @_; + if ($str =~ m,^((?:$URI::scheme_re:)?)//([^/?\#]*)(.*)$,os) { + my($scheme, $host, $rest) = ($1, $2, $3); + my $ui = $host =~ s/(.*@)// ? $1 : ""; + my $port = $host =~ s/(:\d+)\z// ? $1 : ""; + if (_host_escape($host)) { + $str = "$scheme//$ui$host$port$rest"; + } + } + return $class->SUPER::_uric_escape($str); +} + +sub _host_escape { + return unless $_[0] =~ /[^$URI::uric]/; + eval { + require URI::_idna; + $_[0] = URI::_idna::encode($_[0]); + }; + return 0 if $@; + return 1; +} + +sub as_iri { + my $self = shift; + my $str = $self->SUPER::as_iri; + if ($str =~ /\bxn--/) { + if ($str =~ m,^((?:$URI::scheme_re:)?)//([^/?\#]*)(.*)$,os) { + my($scheme, $host, $rest) = ($1, $2, $3); + my $ui = $host =~ s/(.*@)// ? $1 : ""; + my $port = $host =~ s/(:\d+)\z// ? $1 : ""; + require URI::_idna; + $host = URI::_idna::decode($host); + $str = "$scheme//$ui$host$port$rest"; + } + } + return $str; +} + +sub userinfo +{ + my $self = shift; + my $old = $self->authority; + + if (@_) { + my $new = $old; + $new = "" unless defined $new; + $new =~ s/.*@//; # remove old stuff + my $ui = shift; + if (defined $ui) { + $ui =~ s/@/%40/g; # protect @ + $new = "$ui\@$new"; + } + $self->authority($new); + } + return undef if !defined($old) || $old !~ /(.*)@/; + return $1; +} + +sub host +{ + my $self = shift; + my $old = $self->authority; + if (@_) { + my $tmp = $old; + $tmp = "" unless defined $tmp; + my $ui = ($tmp =~ /(.*@)/) ? $1 : ""; + my $port = ($tmp =~ /(:\d+)$/) ? $1 : ""; + my $new = shift; + $new = "" unless defined $new; + if (length $new) { + $new =~ s/[@]/%40/g; # protect @ + if ($new =~ /^[^:]*:\d*\z/ || $new =~ /]:\d*\z/) { + $new =~ s/(:\d*)\z// || die "Assert"; + $port = $1; + } + $new = "[$new]" if $new =~ /:/ && $new !~ /^\[/; # IPv6 address + _host_escape($new); + } + $self->authority("$ui$new$port"); + } + return undef unless defined $old; + $old =~ s/.*@//; + $old =~ s/:\d+$//; # remove the port + $old =~ s{^\[(.*)\]$}{$1}; # remove brackets around IPv6 (RFC 3986 3.2.2) + return uri_unescape($old); +} + +sub ihost +{ + my $self = shift; + my $old = $self->host(@_); + if ($old =~ /(^|\.)xn--/) { + require URI::_idna; + $old = URI::_idna::decode($old); + } + return $old; +} + +sub _port +{ + my $self = shift; + my $old = $self->authority; + if (@_) { + my $new = $old; + $new =~ s/:\d*$//; + my $port = shift; + $new .= ":$port" if defined $port; + $self->authority($new); + } + return $1 if defined($old) && $old =~ /:(\d*)$/; + return; +} + +sub port +{ + my $self = shift; + my $port = $self->_port(@_); + $port = $self->default_port if !defined($port) || $port eq ""; + $port; +} + +sub host_port +{ + my $self = shift; + my $old = $self->authority; + $self->host(shift) if @_; + return undef unless defined $old; + $old =~ s/.*@//; # zap userinfo + $old =~ s/:$//; # empty port should be treated the same a no port + $old .= ":" . $self->port unless $old =~ /:\d+$/; + $old; +} + + +sub default_port { undef } + +sub canonical +{ + my $self = shift; + my $other = $self->SUPER::canonical; + my $host = $other->host || ""; + my $port = $other->_port; + my $uc_host = $host =~ /[A-Z]/; + my $def_port = defined($port) && ($port eq "" || + $port == $self->default_port); + if ($uc_host || $def_port) { + $other = $other->clone if $other == $self; + $other->host(lc $host) if $uc_host; + $other->port(undef) if $def_port; + } + $other; +} + +1; diff --git a/lib/URI/_userpass.pm b/lib/URI/_userpass.pm new file mode 100644 index 0000000..6d260c0 --- /dev/null +++ b/lib/URI/_userpass.pm @@ -0,0 +1,55 @@ +package URI::_userpass; + +use strict; +use warnings; + +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub user +{ + my $self = shift; + my $info = $self->userinfo; + if (@_) { + my $new = shift; + my $pass = defined($info) ? $info : ""; + $pass =~ s/^[^:]*//; + + if (!defined($new) && !length($pass)) { + $self->userinfo(undef); + } else { + $new = "" unless defined($new); + $new =~ s/%/%25/g; + $new =~ s/:/%3A/g; + $self->userinfo("$new$pass"); + } + } + return undef unless defined $info; + $info =~ s/:.*//; + uri_unescape($info); +} + +sub password +{ + my $self = shift; + my $info = $self->userinfo; + if (@_) { + my $new = shift; + my $user = defined($info) ? $info : ""; + $user =~ s/:.*//; + + if (!defined($new) && !length($user)) { + $self->userinfo(undef); + } else { + $new = "" unless defined($new); + $new =~ s/%/%25/g; + $self->userinfo("$user:$new"); + } + } + return undef unless defined $info; + return undef unless $info =~ s/^[^:]*://; + uri_unescape($info); +} + +1; diff --git a/lib/URI/data.pm b/lib/URI/data.pm new file mode 100644 index 0000000..7848502 --- /dev/null +++ b/lib/URI/data.pm @@ -0,0 +1,142 @@ +package URI::data; # RFC 2397 + +use strict; +use warnings; + +use parent 'URI'; + +our $VERSION = '1.69'; + +use MIME::Base64 qw(encode_base64 decode_base64); +use URI::Escape qw(uri_unescape); + +sub media_type +{ + my $self = shift; + my $opaque = $self->opaque; + $opaque =~ /^([^,]*),?/ or die; + my $old = $1; + my $base64; + $base64 = $1 if $old =~ s/(;base64)$//i; + if (@_) { + my $new = shift; + $new = "" unless defined $new; + $new =~ s/%/%25/g; + $new =~ s/,/%2C/g; + $base64 = "" unless defined $base64; + $opaque =~ s/^[^,]*,?/$new$base64,/; + $self->opaque($opaque); + } + return uri_unescape($old) if $old; # media_type can't really be "0" + "text/plain;charset=US-ASCII"; # default type +} + +sub data +{ + my $self = shift; + my($enc, $data) = split(",", $self->opaque, 2); + unless (defined $data) { + $data = ""; + $enc = "" unless defined $enc; + } + my $base64 = ($enc =~ /;base64$/i); + if (@_) { + $enc =~ s/;base64$//i if $base64; + my $new = shift; + $new = "" unless defined $new; + my $uric_count = _uric_count($new); + my $urienc_len = $uric_count + (length($new) - $uric_count) * 3; + my $base64_len = int((length($new)+2) / 3) * 4; + $base64_len += 7; # because of ";base64" marker + if ($base64_len < $urienc_len || $_[0]) { + $enc .= ";base64"; + $new = encode_base64($new, ""); + } else { + $new =~ s/%/%25/g; + } + $self->opaque("$enc,$new"); + } + return unless defined wantarray; + $data = uri_unescape($data); + return $base64 ? decode_base64($data) : $data; +} + +# I could not find a better way to interpolate the tr/// chars from +# a variable. +my $ENC = $URI::uric; +$ENC =~ s/%//; + +eval <new("data:"); + $u->media_type("image/gif"); + $u->data(scalar(`cat camel.gif`)); + print "$u\n"; + open(XV, "|xv -") and print XV $u->data; + +=head1 DESCRIPTION + +The C class supports C objects belonging to the I +URI scheme. The I URI scheme is specified in RFC 2397. It +allows inclusion of small data items as "immediate" data, as if it had +been included externally. Examples: + + data:,Perl%20is%20good + + data:image/gif;base64,R0lGODdhIAAgAIAAAAAAAPj8+CwAAAAAI + AAgAAAClYyPqcu9AJyCjtIKc5w5xP14xgeO2tlY3nWcajmZZdeJcG + Kxrmimms1KMTa1Wg8UROx4MNUq1HrycMjHT9b6xKxaFLM6VRKzI+p + KS9XtXpcbdun6uWVxJXA8pNPkdkkxhxc21LZHFOgD2KMoQXa2KMWI + JtnE2KizVUkYJVZZ1nczBxXlFopZBtoJ2diXGdNUymmJdFMAADs= + + + +C objects belonging to the data scheme support the common methods +(described in L) and the following two scheme-specific methods: + +=over 4 + +=item $uri->media_type( [$new_media_type] ) + +Can be used to get or set the media type specified in the +URI. If no media type is specified, then the default +C<"text/plain;charset=US-ASCII"> is returned. + +=item $uri->data( [$new_data] ) + +Can be used to get or set the data contained in the URI. +The data is passed unescaped (in binary form). The decision about +whether to base64 encode the data in the URI is taken automatically, +based on the encoding that produces the shorter URI string. + +=back + +=head1 SEE ALSO + +L + +=head1 COPYRIGHT + +Copyright 1995-1998 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut diff --git a/lib/URI/file.pm b/lib/URI/file.pm new file mode 100644 index 0000000..d76ddf2 --- /dev/null +++ b/lib/URI/file.pm @@ -0,0 +1,327 @@ +package URI::file; + +use strict; +use warnings; + +use parent 'URI::_generic'; +our $VERSION = "4.21"; + +use URI::Escape qw(uri_unescape); + +our $DEFAULT_AUTHORITY = ""; + +# Map from $^O values to implementation classes. The Unix +# class is the default. +our %OS_CLASS = ( + os2 => "OS2", + mac => "Mac", + MacOS => "Mac", + MSWin32 => "Win32", + win32 => "Win32", + msdos => "FAT", + dos => "FAT", + qnx => "QNX", +); + +sub os_class +{ + my($OS) = shift || $^O; + + my $class = "URI::file::" . ($OS_CLASS{$OS} || "Unix"); + no strict 'refs'; + unless (%{"$class\::"}) { + eval "require $class"; + die $@ if $@; + } + $class; +} + +sub host { uri_unescape(shift->authority(@_)) } + +sub new +{ + my($class, $path, $os) = @_; + os_class($os)->new($path); +} + +sub new_abs +{ + my $class = shift; + my $file = $class->new(@_); + return $file->abs($class->cwd) unless $$file =~ /^file:/; + $file; +} + +sub cwd +{ + my $class = shift; + require Cwd; + my $cwd = Cwd::cwd(); + $cwd = VMS::Filespec::unixpath($cwd) if $^O eq 'VMS'; + $cwd = $class->new($cwd); + $cwd .= "/" unless substr($cwd, -1, 1) eq "/"; + $cwd; +} + +sub canonical { + my $self = shift; + my $other = $self->SUPER::canonical; + + my $scheme = $other->scheme; + my $auth = $other->authority; + return $other if !defined($scheme) && !defined($auth); # relative + + if (!defined($auth) || + $auth eq "" || + lc($auth) eq "localhost" || + (defined($DEFAULT_AUTHORITY) && lc($auth) eq lc($DEFAULT_AUTHORITY)) + ) + { + # avoid cloning if $auth already match + if ((defined($auth) || defined($DEFAULT_AUTHORITY)) && + (!defined($auth) || !defined($DEFAULT_AUTHORITY) || $auth ne $DEFAULT_AUTHORITY) + ) + { + $other = $other->clone if $self == $other; + $other->authority($DEFAULT_AUTHORITY); + } + } + + $other; +} + +sub file +{ + my($self, $os) = @_; + os_class($os)->file($self); +} + +sub dir +{ + my($self, $os) = @_; + os_class($os)->dir($self); +} + +1; + +__END__ + +=head1 NAME + +URI::file - URI that maps to local file names + +=head1 SYNOPSIS + + use URI::file; + + $u1 = URI->new("file:/foo/bar"); + $u2 = URI->new("foo/bar", "file"); + + $u3 = URI::file->new($path); + $u4 = URI::file->new("c:\\windows\\", "win32"); + + $u1->file; + $u1->file("mac"); + +=head1 DESCRIPTION + +The C class supports C objects belonging to the I +URI scheme. This scheme allows us to map the conventional file names +found on various computer systems to the URI name space. An old +specification of the I URI scheme is found in RFC 1738. Some +older background information is also in RFC 1630. There are no newer +specifications as far as I know. + +If you simply want to construct I URI objects from URI strings, +use the normal C constructor. If you want to construct I +URI objects from the actual file names used by various systems, then +use one of the following C constructors: + +=over 4 + +=item $u = URI::file->new( $filename, [$os] ) + +Maps a file name to the I URI name space, creates a URI object +and returns it. The $filename is interpreted as belonging to the +indicated operating system ($os), which defaults to the value of the +$^O variable. The $filename can be either absolute or relative, and +the corresponding type of URI object for $os is returned. + +=item $u = URI::file->new_abs( $filename, [$os] ) + +Same as URI::file->new, but makes sure that the URI returned +represents an absolute file name. If the $filename argument is +relative, then the name is resolved relative to the current directory, +i.e. this constructor is really the same as: + + URI::file->new($filename)->abs(URI::file->cwd); + +=item $u = URI::file->cwd + +Returns a I URI that represents the current working directory. +See L. + +=back + +The following methods are supported for I URI (in addition to +the common and generic methods described in L): + +=over 4 + +=item $u->file( [$os] ) + +Returns a file name. It maps from the URI name space +to the file name space of the indicated operating system. + +It might return C if the name can not be represented in the +indicated file system. + +=item $u->dir( [$os] ) + +Some systems use a different form for names of directories than for plain +files. Use this method if you know you want to use the name for +a directory. + +=back + +The C module can be used to map generic file names to names +suitable for the current system. As such, it can work as a nice +replacement for the C module. For instance, the following +code translates the UNIX-style file name F to a name +suitable for the local system: + + $file = URI::file->new("Foo/Bar.pm", "unix")->file; + die "Can't map filename Foo/Bar.pm for $^O" unless defined $file; + open(FILE, $file) || die "Can't open '$file': $!"; + # do something with FILE + +=head1 MAPPING NOTES + +Most computer systems today have hierarchically organized file systems. +Mapping the names used in these systems to the generic URI syntax +allows us to work with relative file URIs that behave as they should +when resolved using the generic algorithm for URIs (specified in RFC +2396). Mapping a file name to the generic URI syntax involves mapping +the path separator character to "/" and encoding any reserved +characters that appear in the path segments of the file name. If +path segments consisting of the strings "." or ".." have a +different meaning than what is specified for generic URIs, then these +must be encoded as well. + +If the file system has device, volume or drive specifications as +the root of the name space, then it makes sense to map them to the +authority field of the generic URI syntax. This makes sure that +relative URIs can not be resolved "above" them, i.e. generally how +relative file names work in those systems. + +Another common use of the authority field is to encode the host on which +this file name is valid. The host name "localhost" is special and +generally has the same meaning as a missing or empty authority +field. This use is in conflict with using it as a device +specification, but can often be resolved for device specifications +having characters not legal in plain host names. + +File name to URI mapping in normally not one-to-one. There are +usually many URIs that map to any given file name. For instance, an +authority of "localhost" maps the same as a URI with a missing or empty +authority. + +Example 1: The Mac classic (Mac OS 9 and earlier) used ":" as path separator, +but not in the same way as a generic URI. ":foo" was a relative name. "foo:bar" +was an absolute name. Also, path segments could contain the "/" character as well +as the literal "." or "..". So the mapping looks like this: + + Mac classic URI + ---------- ------------------- + :foo:bar <==> foo/bar + : <==> ./ + ::foo:bar <==> ../foo/bar + ::: <==> ../../ + foo:bar <==> file:/foo/bar + foo:bar: <==> file:/foo/bar/ + .. <==> %2E%2E + <== / + foo/ <== file:/foo%2F + ./foo.txt <== file:/.%2Ffoo.txt + +Note that if you want a relative URL, you *must* begin the path with a :. Any +path that begins with [^:] is treated as absolute. + +Example 2: The UNIX file system is easy to map, as it uses the same path +separator as URIs, has a single root, and segments of "." and ".." +have the same meaning. URIs that have the character "\0" or "/" as +part of any path segment can not be turned into valid UNIX file names. + + UNIX URI + ---------- ------------------ + foo/bar <==> foo/bar + /foo/bar <==> file:/foo/bar + /foo/bar <== file://localhost/foo/bar + file: ==> ./file: + <== file:/fo%00/bar + / <==> file:/ + +=cut + + +RFC 1630 + + [...] + + There is clearly a danger of confusion that a link made to a local + file should be followed by someone on a different system, with + unexpected and possibly harmful results. Therefore, the convention + is that even a "file" URL is provided with a host part. This allows + a client on another system to know that it cannot access the file + system, or perhaps to use some other local mechanism to access the + file. + + The special value "localhost" is used in the host field to indicate + that the filename should really be used on whatever host one is. + This for example allows links to be made to files which are + distributed on many machines, or to "your unix local password file" + subject of course to consistency across the users of the data. + + A void host field is equivalent to "localhost". + +=head1 CONFIGURATION VARIABLES + +The following configuration variables influence how the class and its +methods behave: + +=over + +=item %URI::file::OS_CLASS + +This hash maps OS identifiers to implementation classes. You might +want to add or modify this if you want to plug in your own file +handler class. Normally the keys should match the $^O values in use. + +If there is no mapping then the "Unix" implementation is used. + +=item $URI::file::DEFAULT_AUTHORITY + +This determine what "authority" string to include in absolute file +URIs. It defaults to "". If you prefer verbose URIs you might set it +to be "localhost". + +Setting this value to C force behaviour compatible to URI v1.31 +and earlier. In this mode host names in UNC paths and drive letters +are mapped to the authority component on Windows, while we produce +authority-less URIs on Unix. + +=back + + +=head1 SEE ALSO + +L, L, L + +=head1 COPYRIGHT + +Copyright 1995-1998,2004 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut diff --git a/lib/URI/file/Base.pm b/lib/URI/file/Base.pm new file mode 100644 index 0000000..ebf24aa --- /dev/null +++ b/lib/URI/file/Base.pm @@ -0,0 +1,84 @@ +package URI::file::Base; + +use strict; +use warnings; + +use URI::Escape qw(); + +our $VERSION = "1.69"; + +sub new +{ + my $class = shift; + my $path = shift; + $path = "" unless defined $path; + + my($auth, $escaped_auth, $escaped_path); + + ($auth, $escaped_auth) = $class->_file_extract_authority($path); + ($path, $escaped_path) = $class->_file_extract_path($path); + + if (defined $auth) { + $auth =~ s,%,%25,g unless $escaped_auth; + $auth =~ s,([/?\#]), URI::Escape::escape_char($1),eg; + $auth = "//$auth"; + if (defined $path) { + $path = "/$path" unless substr($path, 0, 1) eq "/"; + } else { + $path = ""; + } + } else { + return undef unless defined $path; + $auth = ""; + } + + $path =~ s,([%;?]), URI::Escape::escape_char($1),eg unless $escaped_path; + $path =~ s/\#/%23/g; + + my $uri = $auth . $path; + $uri = "file:$uri" if substr($uri, 0, 1) eq "/"; + + URI->new($uri, "file"); +} + +sub _file_extract_authority +{ + my($class, $path) = @_; + return undef unless $class->_file_is_absolute($path); + return $URI::file::DEFAULT_AUTHORITY; +} + +sub _file_extract_path +{ + return undef; +} + +sub _file_is_absolute +{ + return 0; +} + +sub _file_is_localhost +{ + shift; # class + my $host = lc(shift); + return 1 if $host eq "localhost"; + eval { + require Net::Domain; + lc(Net::Domain::hostfqdn()) eq $host || + lc(Net::Domain::hostname()) eq $host; + }; +} + +sub file +{ + undef; +} + +sub dir +{ + my $self = shift; + $self->file(@_); +} + +1; diff --git a/lib/URI/file/FAT.pm b/lib/URI/file/FAT.pm new file mode 100644 index 0000000..2d78275 --- /dev/null +++ b/lib/URI/file/FAT.pm @@ -0,0 +1,27 @@ +package URI::file::FAT; + +use strict; +use warnings; + +use parent 'URI::file::Win32'; + +our $VERSION = "1.69"; + +sub fix_path +{ + shift; # class + for (@_) { + # turn it into 8.3 names + my @p = map uc, split(/\./, $_, -1); + return if @p > 2; # more than 1 dot is not allowed + @p = ("") unless @p; # split bug? (returns nothing when splitting "") + $_ = substr($p[0], 0, 8); + if (@p > 1) { + my $ext = substr($p[1], 0, 3); + $_ .= ".$ext" if length $ext; + } + } + 1; # ok +} + +1; diff --git a/lib/URI/file/Mac.pm b/lib/URI/file/Mac.pm new file mode 100644 index 0000000..4d8e766 --- /dev/null +++ b/lib/URI/file/Mac.pm @@ -0,0 +1,121 @@ +package URI::file::Mac; + +use strict; +use warnings; + +use parent 'URI::file::Base'; + +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub _file_extract_path +{ + my $class = shift; + my $path = shift; + + my @pre; + if ($path =~ s/^(:+)//) { + if (length($1) == 1) { + @pre = (".") unless length($path); + } else { + @pre = ("..") x (length($1) - 1); + } + } else { #absolute + $pre[0] = ""; + } + + my $isdir = ($path =~ s/:$//); + $path =~ s,([%/;]), URI::Escape::escape_char($1),eg; + + my @path = split(/:/, $path, -1); + for (@path) { + if ($_ eq "." || $_ eq "..") { + $_ = "%2E" x length($_); + } + $_ = ".." unless length($_); + } + push (@path,"") if $isdir; + (join("/", @pre, @path), 1); +} + + +sub file +{ + my $class = shift; + my $uri = shift; + my @path; + + my $auth = $uri->authority; + if (defined $auth) { + if (lc($auth) ne "localhost" && $auth ne "") { + my $u_auth = uri_unescape($auth); + if (!$class->_file_is_localhost($u_auth)) { + # some other host (use it as volume name) + @path = ("", $auth); + # XXX or just return to make it illegal; + } + } + } + my @ps = split("/", $uri->path, -1); + shift @ps if @path; + push(@path, @ps); + + my $pre = ""; + if (!@path) { + return; # empty path; XXX return ":" instead? + } elsif ($path[0] eq "") { + # absolute + shift(@path); + if (@path == 1) { + return if $path[0] eq ""; # not root directory + push(@path, ""); # volume only, effectively append ":" + } + @ps = @path; + @path = (); + my $part; + for (@ps) { #fix up "." and "..", including interior, in relatives + next if $_ eq "."; + $part = $_ eq ".." ? "" : $_; + push(@path,$part); + } + if ($ps[-1] eq "..") { #if this happens, we need another : + push(@path,""); + } + + } else { + $pre = ":"; + @ps = @path; + @path = (); + my $part; + for (@ps) { #fix up "." and "..", including interior, in relatives + next if $_ eq "."; + $part = $_ eq ".." ? "" : $_; + push(@path,$part); + } + if ($ps[-1] eq "..") { #if this happens, we need another : + push(@path,""); + } + + } + return unless $pre || @path; + for (@path) { + s/;.*//; # get rid of parameters + #return unless length; # XXX + $_ = uri_unescape($_); + return if /\0/; + return if /:/; # Should we? + } + $pre . join(":", @path); +} + +sub dir +{ + my $class = shift; + my $path = $class->file(@_); + return unless defined $path; + $path .= ":" unless $path =~ /:$/; + $path; +} + +1; diff --git a/lib/URI/file/OS2.pm b/lib/URI/file/OS2.pm new file mode 100644 index 0000000..f451f9e --- /dev/null +++ b/lib/URI/file/OS2.pm @@ -0,0 +1,32 @@ +package URI::file::OS2; + +use strict; +use warnings; + +use parent 'URI::file::Win32'; + +our $VERSION = "1.69"; + +# The Win32 version translates k:/foo to file://k:/foo (?!) +# We add an empty host + +sub _file_extract_authority +{ + my $class = shift; + return $1 if $_[0] =~ s,^\\\\([^\\]+),,; # UNC + return $1 if $_[0] =~ s,^//([^/]+),,; # UNC too? + + if ($_[0] =~ m#^[a-zA-Z]{1,2}:#) { # allow for ab: drives + return ""; + } + return; +} + +sub file { + my $p = &URI::file::Win32::file; + return unless defined $p; + $p =~ s,\\,/,g; + $p; +} + +1; diff --git a/lib/URI/file/QNX.pm b/lib/URI/file/QNX.pm new file mode 100644 index 0000000..1d725e4 --- /dev/null +++ b/lib/URI/file/QNX.pm @@ -0,0 +1,20 @@ +package URI::file::QNX; + +use strict; +use warnings; + +use parent 'URI::file::Unix'; + +our $VERSION = "1.69"; + +sub _file_extract_path +{ + my($class, $path) = @_; + # tidy path + $path =~ s,(.)//+,$1/,g; # ^// is correct + $path =~ s,(/\.)+/,/,g; + $path = "./$path" if $path =~ m,^[^:/]+:,,; # look like "scheme:" + $path; +} + +1; diff --git a/lib/URI/file/Unix.pm b/lib/URI/file/Unix.pm new file mode 100644 index 0000000..b06acc7 --- /dev/null +++ b/lib/URI/file/Unix.pm @@ -0,0 +1,58 @@ +package URI::file::Unix; + +use strict; +use warnings; + +use parent 'URI::file::Base'; + +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub _file_extract_path +{ + my($class, $path) = @_; + + # tidy path + $path =~ s,//+,/,g; + $path =~ s,(/\.)+/,/,g; + $path = "./$path" if $path =~ m,^[^:/]+:,,; # look like "scheme:" + + return $path; +} + +sub _file_is_absolute { + my($class, $path) = @_; + return $path =~ m,^/,; +} + +sub file +{ + my $class = shift; + my $uri = shift; + my @path; + + my $auth = $uri->authority; + if (defined($auth)) { + if (lc($auth) ne "localhost" && $auth ne "") { + $auth = uri_unescape($auth); + unless ($class->_file_is_localhost($auth)) { + push(@path, "", "", $auth); + } + } + } + + my @ps = $uri->path_segments; + shift @ps if @path; + push(@path, @ps); + + for (@path) { + # Unix file/directory names are not allowed to contain '\0' or '/' + return undef if /\0/; + return undef if /\//; # should we really? + } + + return join("/", @path); +} + +1; diff --git a/lib/URI/file/Win32.pm b/lib/URI/file/Win32.pm new file mode 100644 index 0000000..f00b9c9 --- /dev/null +++ b/lib/URI/file/Win32.pm @@ -0,0 +1,87 @@ +package URI::file::Win32; + +use strict; +use warnings; + +use parent 'URI::file::Base'; + +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub _file_extract_authority +{ + my $class = shift; + + return $class->SUPER::_file_extract_authority($_[0]) + if defined $URI::file::DEFAULT_AUTHORITY; + + return $1 if $_[0] =~ s,^\\\\([^\\]+),,; # UNC + return $1 if $_[0] =~ s,^//([^/]+),,; # UNC too? + + if ($_[0] =~ s,^([a-zA-Z]:),,) { + my $auth = $1; + $auth .= "relative" if $_[0] !~ m,^[\\/],; + return $auth; + } + return undef; +} + +sub _file_extract_path +{ + my($class, $path) = @_; + $path =~ s,\\,/,g; + #$path =~ s,//+,/,g; + $path =~ s,(/\.)+/,/,g; + + if (defined $URI::file::DEFAULT_AUTHORITY) { + $path =~ s,^([a-zA-Z]:),/$1,; + } + + return $path; +} + +sub _file_is_absolute { + my($class, $path) = @_; + return $path =~ m,^[a-zA-Z]:, || $path =~ m,^[/\\],; +} + +sub file +{ + my $class = shift; + my $uri = shift; + my $auth = $uri->authority; + my $rel; # is filename relative to drive specified in authority + if (defined $auth) { + $auth = uri_unescape($auth); + if ($auth =~ /^([a-zA-Z])[:|](relative)?/) { + $auth = uc($1) . ":"; + $rel++ if $2; + } elsif (lc($auth) eq "localhost") { + $auth = ""; + } elsif (length $auth) { + $auth = "\\\\" . $auth; # UNC + } + } else { + $auth = ""; + } + + my @path = $uri->path_segments; + for (@path) { + return undef if /\0/; + return undef if /\//; + #return undef if /\\/; # URLs with "\" is not uncommon + } + return undef unless $class->fix_path(@path); + + my $path = join("\\", @path); + $path =~ s/^\\// if $rel; + $path = $auth . $path; + $path =~ s,^\\([a-zA-Z])[:|],\u$1:,; + + return $path; +} + +sub fix_path { 1; } + +1; diff --git a/lib/URI/ftp.pm b/lib/URI/ftp.pm new file mode 100644 index 0000000..7d6848f --- /dev/null +++ b/lib/URI/ftp.pm @@ -0,0 +1,46 @@ +package URI::ftp; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent qw(URI::_server URI::_userpass); + +sub default_port { 21 } + +sub path { shift->path_query(@_) } # XXX + +sub _user { shift->SUPER::user(@_); } +sub _password { shift->SUPER::password(@_); } + +sub user +{ + my $self = shift; + my $user = $self->_user(@_); + $user = "anonymous" unless defined $user; + $user; +} + +sub password +{ + my $self = shift; + my $pass = $self->_password(@_); + unless (defined $pass) { + my $user = $self->user; + if ($user eq 'anonymous' || $user eq 'ftp') { + # anonymous ftp login password + # If there is no ftp anonymous password specified + # then we'll just use 'anonymous@' + # We don't try to send the read e-mail address because: + # - We want to remain anonymous + # - We want to stop SPAM + # - We don't want to let ftp sites to discriminate by the user, + # host, country or ftp client being used. + $pass = 'anonymous@'; + } + } + $pass; +} + +1; diff --git a/lib/URI/gopher.pm b/lib/URI/gopher.pm new file mode 100644 index 0000000..d9f7eb5 --- /dev/null +++ b/lib/URI/gopher.pm @@ -0,0 +1,97 @@ +package URI::gopher; # , Dec 4, 1996 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_server'; + +use URI::Escape qw(uri_unescape); + +# A Gopher URL follows the common internet scheme syntax as defined in +# section 4.3 of [RFC-URL-SYNTAX]: +# +# gopher://[:]/ +# +# where +# +# := | +# %09 | +# %09%09 +# +# := '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' +# '8' | '9' | '+' | 'I' | 'g' | 'T' +# +# := *pchar Refer to RFC 1808 [4] +# := *pchar +# := *uchar Refer to RFC 1738 [3] +# +# If the optional port is omitted, the port defaults to 70. + +sub default_port { 70 } + +sub _gopher_type +{ + my $self = shift; + my $path = $self->path_query; + $path =~ s,^/,,; + my $gtype = $1 if $path =~ s/^(.)//s; + if (@_) { + my $new_type = shift; + if (defined($new_type)) { + Carp::croak("Bad gopher type '$new_type'") + unless length($new_type) == 1; + substr($path, 0, 0) = $new_type; + $self->path_query($path); + } else { + Carp::croak("Can't delete gopher type when selector is present") + if length($path); + $self->path_query(undef); + } + } + return $gtype; +} + +sub gopher_type +{ + my $self = shift; + my $gtype = $self->_gopher_type(@_); + $gtype = "1" unless defined $gtype; + $gtype; +} + +sub gtype { goto &gopher_type } # URI::URL compatibility + +sub selector { shift->_gfield(0, @_) } +sub search { shift->_gfield(1, @_) } +sub string { shift->_gfield(2, @_) } + +sub _gfield +{ + my $self = shift; + my $fno = shift; + my $path = $self->path_query; + + # not according to spec., but many popular browsers accept + # gopher URLs with a '?' before the search string. + $path =~ s/\?/\t/; + $path = uri_unescape($path); + $path =~ s,^/,,; + my $gtype = $1 if $path =~ s,^(.),,s; + my @path = split(/\t/, $path, 3); + if (@_) { + # modify + my $new = shift; + $path[$fno] = $new; + pop(@path) while @path && !defined($path[-1]); + for (@path) { $_="" unless defined } + $path = $gtype; + $path = "1" unless defined $path; + $path .= join("\t", @path); + $self->path_query($path); + } + $path[$fno]; +} + +1; diff --git a/lib/URI/http.pm b/lib/URI/http.pm new file mode 100644 index 0000000..a7e921a --- /dev/null +++ b/lib/URI/http.pm @@ -0,0 +1,27 @@ +package URI::http; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_server'; + +sub default_port { 80 } + +sub canonical +{ + my $self = shift; + my $other = $self->SUPER::canonical; + + my $slash_path = defined($other->authority) && + !length($other->path) && !defined($other->query); + + if ($slash_path) { + $other = $other->clone if $other == $self; + $other->path("/"); + } + $other; +} + +1; diff --git a/lib/URI/https.pm b/lib/URI/https.pm new file mode 100644 index 0000000..e346b30 --- /dev/null +++ b/lib/URI/https.pm @@ -0,0 +1,14 @@ +package URI::https; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::http'; + +sub default_port { 443 } + +sub secure { 1 } + +1; diff --git a/lib/URI/ldap.pm b/lib/URI/ldap.pm new file mode 100644 index 0000000..60e7f2b --- /dev/null +++ b/lib/URI/ldap.pm @@ -0,0 +1,120 @@ +# Copyright (c) 1998 Graham Barr . All rights reserved. +# This program is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. + +package URI::ldap; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent qw(URI::_ldap URI::_server); + +sub default_port { 389 } + +sub _nonldap_canonical { + my $self = shift; + $self->URI::_server::canonical(@_); +} + +1; + +__END__ + +=head1 NAME + +URI::ldap - LDAP Uniform Resource Locators + +=head1 SYNOPSIS + + use URI; + + $uri = URI->new("ldap:$uri_string"); + $dn = $uri->dn; + $filter = $uri->filter; + @attr = $uri->attributes; + $scope = $uri->scope; + %extn = $uri->extensions; + + $uri = URI->new("ldap:"); # start empty + $uri->host("ldap.itd.umich.edu"); + $uri->dn("o=University of Michigan,c=US"); + $uri->attributes(qw(postalAddress)); + $uri->scope('sub'); + $uri->filter('(cn=Babs Jensen)'); + print $uri->as_string,"\n"; + +=head1 DESCRIPTION + +C provides an interface to parse an LDAP URI into its +constituent parts and also to build a URI as described in +RFC 2255. + +=head1 METHODS + +C supports all the generic and server methods defined by +L, plus the following. + +Each of the following methods can be used to set or get the value in +the URI. The values are passed in unescaped form. None of these +return undefined values, but elements without a default can be empty. +If arguments are given, then a new value is set for the given part +of the URI. + +=over 4 + +=item $uri->dn( [$new_dn] ) + +Sets or gets the I part of the URI. The DN +identifies the base object of the LDAP search. + +=item $uri->attributes( [@new_attrs] ) + +Sets or gets the list of attribute names which are +returned by the search. + +=item $uri->scope( [$new_scope] ) + +Sets or gets the scope to be used by the search. The value can be one of +C<"base">, C<"one"> or C<"sub">. If none is given in the URI then the +return value defaults to C<"base">. + +=item $uri->_scope( [$new_scope] ) + +Same as scope(), but does not default to anything. + +=item $uri->filter( [$new_filter] ) + +Sets or gets the filter to be used by the search. If none is given in +the URI then the return value defaults to C<"(objectClass=*)">. + +=item $uri->_filter( [$new_filter] ) + +Same as filter(), but does not default to anything. + +=item $uri->extensions( [$etype => $evalue,...] ) + +Sets or gets the extensions used for the search. The list passed should +be in the form etype1 => evalue1, etype2 => evalue2,... This is also +the form of list that is returned. + +=back + +=head1 SEE ALSO + +L + +=head1 AUTHOR + +Graham Barr EFE + +Slightly modified by Gisle Aas to fit into the URI distribution. + +=head1 COPYRIGHT + +Copyright (c) 1998 Graham Barr. All rights reserved. This program is +free software; you can redistribute it and/or modify it under the same +terms as Perl itself. + +=cut diff --git a/lib/URI/ldapi.pm b/lib/URI/ldapi.pm new file mode 100644 index 0000000..12a0b08 --- /dev/null +++ b/lib/URI/ldapi.pm @@ -0,0 +1,29 @@ +package URI::ldapi; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent qw(URI::_ldap URI::_generic); + +require URI::Escape; + +sub un_path { + my $self = shift; + my $old = URI::Escape::uri_unescape($self->authority); + if (@_) { + my $p = shift; + $p =~ s/:/%3A/g; + $p =~ s/\@/%40/g; + $self->authority($p); + } + return $old; +} + +sub _nonldap_canonical { + my $self = shift; + $self->URI::_generic::canonical(@_); +} + +1; diff --git a/lib/URI/ldaps.pm b/lib/URI/ldaps.pm new file mode 100644 index 0000000..80a96d3 --- /dev/null +++ b/lib/URI/ldaps.pm @@ -0,0 +1,14 @@ +package URI::ldaps; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::ldap'; + +sub default_port { 636 } + +sub secure { 1 } + +1; diff --git a/lib/URI/mailto.pm b/lib/URI/mailto.pm new file mode 100644 index 0000000..0e94463 --- /dev/null +++ b/lib/URI/mailto.pm @@ -0,0 +1,73 @@ +package URI::mailto; # RFC 2368 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent qw(URI URI::_query); + +sub to +{ + my $self = shift; + my @old = $self->headers; + if (@_) { + my @new = @old; + # get rid of any other to: fields + for (my $i = 0; $i < @new; $i += 2) { + if (lc($new[$i] || '') eq "to") { + splice(@new, $i, 2); + redo; + } + } + + my $to = shift; + $to = "" unless defined $to; + unshift(@new, "to" => $to); + $self->headers(@new); + } + return unless defined wantarray; + + my @to; + while (@old) { + my $h = shift @old; + my $v = shift @old; + push(@to, $v) if lc($h) eq "to"; + } + join(",", @to); +} + + +sub headers +{ + my $self = shift; + + # The trick is to just treat everything as the query string... + my $opaque = "to=" . $self->opaque; + $opaque =~ s/\?/&/; + + if (@_) { + my @new = @_; + + # strip out any "to" fields + my @to; + for (my $i=0; $i < @new; $i += 2) { + if (lc($new[$i] || '') eq "to") { + push(@to, (splice(@new, $i, 2))[1]); # remove header + redo; + } + } + + my $new = join(",",@to); + $new =~ s/%/%25/g; + $new =~ s/\?/%3F/g; + $self->opaque($new); + $self->query_form(@new) if @new; + } + return unless defined wantarray; + + # I am lazy today... + URI->new("mailto:?$opaque")->query_form; +} + +1; diff --git a/lib/URI/mms.pm b/lib/URI/mms.pm new file mode 100644 index 0000000..c9af387 --- /dev/null +++ b/lib/URI/mms.pm @@ -0,0 +1,12 @@ +package URI::mms; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::http'; + +sub default_port { 1755 } + +1; diff --git a/lib/URI/news.pm b/lib/URI/news.pm new file mode 100644 index 0000000..77e2c18 --- /dev/null +++ b/lib/URI/news.pm @@ -0,0 +1,71 @@ +package URI::news; # draft-gilman-news-url-01 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_server'; + +use URI::Escape qw(uri_unescape); +use Carp (); + +sub default_port { 119 } + +# newsURL = scheme ":" [ news-server ] [ refbygroup | message ] +# scheme = "news" | "snews" | "nntp" +# news-server = "//" server "/" +# refbygroup = group [ "/" messageno [ "-" messageno ] ] +# message = local-part "@" domain + +sub _group +{ + my $self = shift; + my $old = $self->path; + if (@_) { + my($group,$from,$to) = @_; + if ($group =~ /\@/) { + $group =~ s/^<(.*)>$/$1/; # "<" and ">" should not be part of it + } + $group =~ s,%,%25,g; + $group =~ s,/,%2F,g; + my $path = $group; + if (defined $from) { + $path .= "/$from"; + $path .= "-$to" if defined $to; + } + $self->path($path); + } + + $old =~ s,^/,,; + if ($old !~ /\@/ && $old =~ s,/(.*),, && wantarray) { + my $extra = $1; + return (uri_unescape($old), split(/-/, $extra)); + } + uri_unescape($old); +} + + +sub group +{ + my $self = shift; + if (@_) { + Carp::croak("Group name can't contain '\@'") if $_[0] =~ /\@/; + } + my @old = $self->_group(@_); + return if $old[0] =~ /\@/; + wantarray ? @old : $old[0]; +} + +sub message +{ + my $self = shift; + if (@_) { + Carp::croak("Message must contain '\@'") unless $_[0] =~ /\@/; + } + my $old = $self->_group(@_); + return undef unless $old =~ /\@/; + return $old; +} + +1; diff --git a/lib/URI/nntp.pm b/lib/URI/nntp.pm new file mode 100644 index 0000000..73dc629 --- /dev/null +++ b/lib/URI/nntp.pm @@ -0,0 +1,10 @@ +package URI::nntp; # draft-gilman-news-url-01 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::news'; + +1; diff --git a/lib/URI/pop.pm b/lib/URI/pop.pm new file mode 100644 index 0000000..cfc5e11 --- /dev/null +++ b/lib/URI/pop.pm @@ -0,0 +1,71 @@ +package URI::pop; # RFC 2384 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_server'; + +use URI::Escape qw(uri_unescape); + +sub default_port { 110 } + +#pop://;auth=@: + +sub user +{ + my $self = shift; + my $old = $self->userinfo; + + if (@_) { + my $new_info = $old; + $new_info = "" unless defined $new_info; + $new_info =~ s/^[^;]*//; + + my $new = shift; + if (!defined($new) && !length($new_info)) { + $self->userinfo(undef); + } else { + $new = "" unless defined $new; + $new =~ s/%/%25/g; + $new =~ s/;/%3B/g; + $self->userinfo("$new$new_info"); + } + } + + return undef unless defined $old; + $old =~ s/;.*//; + return uri_unescape($old); +} + +sub auth +{ + my $self = shift; + my $old = $self->userinfo; + + if (@_) { + my $new = $old; + $new = "" unless defined $new; + $new =~ s/(^[^;]*)//; + my $user = $1; + $new =~ s/;auth=[^;]*//i; + + + my $auth = shift; + if (defined $auth) { + $auth =~ s/%/%25/g; + $auth =~ s/;/%3B/g; + $new = ";AUTH=$auth$new"; + } + $self->userinfo("$user$new"); + + } + + return undef unless defined $old; + $old =~ s/^[^;]*//; + return uri_unescape($1) if $old =~ /;auth=(.*)/i; + return; +} + +1; diff --git a/lib/URI/rlogin.pm b/lib/URI/rlogin.pm new file mode 100644 index 0000000..5ed141b --- /dev/null +++ b/lib/URI/rlogin.pm @@ -0,0 +1,12 @@ +package URI::rlogin; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_login'; + +sub default_port { 513 } + +1; diff --git a/lib/URI/rsync.pm b/lib/URI/rsync.pm new file mode 100644 index 0000000..9cb649a --- /dev/null +++ b/lib/URI/rsync.pm @@ -0,0 +1,14 @@ +package URI::rsync; # http://rsync.samba.org/ + +# rsync://[USER@]HOST[:PORT]/SRC + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent qw(URI::_server URI::_userpass); + +sub default_port { 873 } + +1; diff --git a/lib/URI/rtsp.pm b/lib/URI/rtsp.pm new file mode 100644 index 0000000..fa82efc --- /dev/null +++ b/lib/URI/rtsp.pm @@ -0,0 +1,12 @@ +package URI::rtsp; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::http'; + +sub default_port { 554 } + +1; diff --git a/lib/URI/rtspu.pm b/lib/URI/rtspu.pm new file mode 100644 index 0000000..b91fb80 --- /dev/null +++ b/lib/URI/rtspu.pm @@ -0,0 +1,12 @@ +package URI::rtspu; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::rtsp'; + +sub default_port { 554 } + +1; diff --git a/lib/URI/sftp.pm b/lib/URI/sftp.pm new file mode 100644 index 0000000..03bf9b3 --- /dev/null +++ b/lib/URI/sftp.pm @@ -0,0 +1,10 @@ +package URI::sftp; + +use strict; +use warnings; + +use parent 'URI::ssh'; + +our $VERSION = "1.69"; + +1; diff --git a/lib/URI/sip.pm b/lib/URI/sip.pm new file mode 100644 index 0000000..7b27a9f --- /dev/null +++ b/lib/URI/sip.pm @@ -0,0 +1,85 @@ +# +# Written by Ryan Kereliuk . This file may be +# distributed under the same terms as Perl itself. +# +# The RFC 3261 sip URI is :;?. +# + +package URI::sip; + +use strict; +use warnings; + +use parent qw(URI::_server URI::_userpass); + +use URI::Escape qw(uri_unescape); + +our $VERSION = "1.69"; + +sub default_port { 5060 } + +sub authority +{ + my $self = shift; + $$self =~ m,^($URI::scheme_re:)?([^;?]*)(.*)$,os or die; + my $old = $2; + + if (@_) { + my $auth = shift; + $$self = defined($1) ? $1 : ""; + my $rest = $3; + if (defined $auth) { + $auth =~ s/([^$URI::uric])/ URI::Escape::escape_char($1)/ego; + $$self .= "$auth"; + } + $$self .= $rest; + } + $old; +} + +sub params_form +{ + my $self = shift; + $$self =~ m,^((?:$URI::scheme_re:)?)(?:([^;?]*))?(;[^?]*)?(.*)$,os or die; + my $paramstr = $3; + + if (@_) { + my @args = @_; + $$self = $1 . $2; + my $rest = $4; + my @new; + for (my $i=0; $i < @args; $i += 2) { + push(@new, "$args[$i]=$args[$i+1]"); + } + $paramstr = join(";", @new); + $$self .= ";" . $paramstr . $rest; + } + $paramstr =~ s/^;//o; + return split(/[;=]/, $paramstr); +} + +sub params +{ + my $self = shift; + $$self =~ m,^((?:$URI::scheme_re:)?)(?:([^;?]*))?(;[^?]*)?(.*)$,os or die; + my $paramstr = $3; + + if (@_) { + my $new = shift; + $$self = $1 . $2; + my $rest = $4; + $$self .= $paramstr . $rest; + } + $paramstr =~ s/^;//o; + return $paramstr; +} + +# Inherited methods that make no sense for a SIP URI. +sub path {} +sub path_query {} +sub path_segments {} +sub abs { shift } +sub rel { shift } +sub query_keywords {} + +1; diff --git a/lib/URI/sips.pm b/lib/URI/sips.pm new file mode 100644 index 0000000..767067f --- /dev/null +++ b/lib/URI/sips.pm @@ -0,0 +1,14 @@ +package URI::sips; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::sip'; + +sub default_port { 5061 } + +sub secure { 1 } + +1; diff --git a/lib/URI/snews.pm b/lib/URI/snews.pm new file mode 100644 index 0000000..de1d515 --- /dev/null +++ b/lib/URI/snews.pm @@ -0,0 +1,14 @@ +package URI::snews; # draft-gilman-news-url-01 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::news'; + +sub default_port { 563 } + +sub secure { 1 } + +1; diff --git a/lib/URI/ssh.pm b/lib/URI/ssh.pm new file mode 100644 index 0000000..396c7a7 --- /dev/null +++ b/lib/URI/ssh.pm @@ -0,0 +1,16 @@ +package URI::ssh; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_login'; + +# ssh://[USER@]HOST[:PORT]/SRC + +sub default_port { 22 } + +sub secure { 1 } + +1; diff --git a/lib/URI/telnet.pm b/lib/URI/telnet.pm new file mode 100644 index 0000000..b86d9aa --- /dev/null +++ b/lib/URI/telnet.pm @@ -0,0 +1,12 @@ +package URI::telnet; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_login'; + +sub default_port { 23 } + +1; diff --git a/lib/URI/tn3270.pm b/lib/URI/tn3270.pm new file mode 100644 index 0000000..fb60acc --- /dev/null +++ b/lib/URI/tn3270.pm @@ -0,0 +1,12 @@ +package URI::tn3270; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::_login'; + +sub default_port { 23 } + +1; diff --git a/lib/URI/urn.pm b/lib/URI/urn.pm new file mode 100644 index 0000000..fe744f8 --- /dev/null +++ b/lib/URI/urn.pm @@ -0,0 +1,100 @@ +package URI::urn; # RFC 2141 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI'; + +use Carp qw(carp); + +my %implementor; + +sub _init { + my $class = shift; + my $self = $class->SUPER::_init(@_); + my $nid = $self->nid; + + my $impclass = $implementor{$nid}; + return $impclass->_urn_init($self, $nid) if $impclass; + + $impclass = "URI::urn"; + if ($nid =~ /^[A-Za-z\d][A-Za-z\d\-]*\z/) { + my $id = $nid; + # make it a legal perl identifier + $id =~ s/-/_/g; + $id = "_$id" if $id =~ /^\d/; + + $impclass = "URI::urn::$id"; + no strict 'refs'; + unless (@{"${impclass}::ISA"}) { + # Try to load it + eval "require $impclass"; + die $@ if $@ && $@ !~ /Can\'t locate.*in \@INC/; + $impclass = "URI::urn" unless @{"${impclass}::ISA"}; + } + } + else { + carp("Illegal namespace identifier '$nid' for URN '$self'") if $^W; + } + $implementor{$nid} = $impclass; + + return $impclass->_urn_init($self, $nid); +} + +sub _urn_init { + my($class, $self, $nid) = @_; + bless $self, $class; +} + +sub _nid { + my $self = shift; + my $opaque = $self->opaque; + if (@_) { + my $v = $opaque; + my $new = shift; + $v =~ s/[^:]*/$new/; + $self->opaque($v); + # XXX possible rebless + } + $opaque =~ s/:.*//s; + return $opaque; +} + +sub nid { # namespace identifier + my $self = shift; + my $nid = $self->_nid(@_); + $nid = lc($nid) if defined($nid); + return $nid; +} + +sub nss { # namespace specific string + my $self = shift; + my $opaque = $self->opaque; + if (@_) { + my $v = $opaque; + my $new = shift; + if (defined $new) { + $v =~ s/(:|\z).*/:$new/; + } + else { + $v =~ s/:.*//s; + } + $self->opaque($v); + } + return undef unless $opaque =~ s/^[^:]*://; + return $opaque; +} + +sub canonical { + my $self = shift; + my $nid = $self->_nid; + my $new = $self->SUPER::canonical; + return $new if $nid !~ /[A-Z]/ || $nid =~ /%/; + $new = $new->clone if $new == $self; + $new->nid(lc($nid)); + return $new; +} + +1; diff --git a/lib/URI/urn/isbn.pm b/lib/URI/urn/isbn.pm new file mode 100644 index 0000000..b335044 --- /dev/null +++ b/lib/URI/urn/isbn.pm @@ -0,0 +1,103 @@ +package URI::urn::isbn; # RFC 3187 + +use strict; +use warnings; + +use parent 'URI::urn'; + +use Carp qw(carp); + +BEGIN { + require Business::ISBN; + + local $^W = 0; # don't warn about dev versions, perl5.004 style + warn "Using Business::ISBN version " . Business::ISBN->VERSION . + " which is deprecated.\nUpgrade to Business::ISBN version 2\n" + if Business::ISBN->VERSION < 2; + } + +sub _isbn { + my $nss = shift; + $nss = $nss->nss if ref($nss); + my $isbn = Business::ISBN->new($nss); + $isbn = undef if $isbn && !$isbn->is_valid; + return $isbn; +} + +sub _nss_isbn { + my $self = shift; + my $nss = $self->nss(@_); + my $isbn = _isbn($nss); + $isbn = $isbn->as_string if $isbn; + return($nss, $isbn); +} + +sub isbn { + my $self = shift; + my $isbn; + (undef, $isbn) = $self->_nss_isbn(@_); + return $isbn; +} + +sub isbn_publisher_code { + my $isbn = shift->_isbn || return undef; + return $isbn->publisher_code; +} + +BEGIN { +my $group_method = do { + local $^W = 0; # don't warn about dev versions, perl5.004 style + Business::ISBN->VERSION >= 2 ? 'group_code' : 'country_code'; + }; + +sub isbn_group_code { + my $isbn = shift->_isbn || return undef; + return $isbn->$group_method; +} +} + +sub isbn_country_code { + my $name = (caller(0))[3]; $name =~ s/.*:://; + carp "$name is DEPRECATED. Use isbn_group_code instead"; + + no strict 'refs'; + &isbn_group_code; +} + +BEGIN { +my $isbn13_method = do { + local $^W = 0; # don't warn about dev versions, perl5.004 style + Business::ISBN->VERSION >= 2 ? 'as_isbn13' : 'as_ean'; + }; + +sub isbn13 { + my $isbn = shift->_isbn || return undef; + + # Business::ISBN 1.x didn't put hyphens in the EAN, and it was just a string + # Business::ISBN 2.0 doesn't do EAN, but it does ISBN-13 objects + # and it uses the hyphens, so call as_string with an empty anon array + # or, adjust the test and features to say that it comes out with hyphens. + my $thingy = $isbn->$isbn13_method; + return eval { $thingy->can( 'as_string' ) } ? $thingy->as_string([]) : $thingy; +} +} + +sub isbn_as_ean { + my $name = (caller(0))[3]; $name =~ s/.*:://; + carp "$name is DEPRECATED. Use isbn13 instead"; + + no strict 'refs'; + &isbn13; +} + +sub canonical { + my $self = shift; + my($nss, $isbn) = $self->_nss_isbn; + my $new = $self->SUPER::canonical; + return $new unless $nss && $isbn && $nss ne $isbn; + $new = $new->clone if $new == $self; + $new->nss($isbn); + return $new; +} + +1; diff --git a/lib/URI/urn/oid.pm b/lib/URI/urn/oid.pm new file mode 100644 index 0000000..ceb8322 --- /dev/null +++ b/lib/URI/urn/oid.pm @@ -0,0 +1,20 @@ +package URI::urn::oid; # RFC 2061 + +use strict; +use warnings; + +our $VERSION = "1.69"; + +use parent 'URI::urn'; + +sub oid { + my $self = shift; + my $old = $self->nss; + if (@_) { + $self->nss(join(".", @_)); + } + return split(/\./, $old) if wantarray; + return $old; +} + +1; diff --git a/t/abs.t b/t/abs.t new file mode 100644 index 0000000..ac79686 --- /dev/null +++ b/t/abs.t @@ -0,0 +1,173 @@ +use strict; +use warnings; + +print "1..45\n"; + +# This test the resolution of abs path for all examples given +# in the "Uniform Resource Identifiers (URI): Generic Syntax" document. + +use URI; +my $base = "http://a/b/c/d;p?q"; +my $testno = 1; +my @rel_fail; + +while () { + #next if 1 .. /^C\.\s+/; + #last if /^D\.\s+/; + next unless /\s+(\S+)\s*=\s*(.*)/; + my $uref = $1; + my $expect = $2; + $expect =~ s/\(current document\)/$base/; + #print "$uref => $expect\n"; + + my $bad; + my $u = URI->new($uref, $base); + if ($u->abs($base)->as_string ne $expect) { + $bad++; + my $abs = $u->abs($base)->as_string; + print qq(URI->new("$uref")->abs("$base") ==> "$abs"\n); + } + + # Let's test another version of the same thing + $u = URI->new($uref); + my $b = URI->new($base); + if ($u->abs($b,1) ne $expect && $uref !~ /^http:/) { + $bad++; + print qq(URI->new("$uref")->abs(URI->new("$base"), 1)\n); + } + + # Let's try the other way + $u = URI->new($expect)->rel($base)->as_string; + if ($u ne $uref) { + push(@rel_fail, qq($testno: URI->new("$expect", "$base")->rel ==> "$u" (not "$uref")\n)); + } + + print "not " if $bad; + print "ok ", $testno++, "\n"; +} + +if (@rel_fail) { + print "\n\nIn the following cases we did not get back to where we started with rel()\n"; + print @rel_fail; +} + + + +__END__ + +Network Working Group T. Berners-Lee, MIT/LCS +INTERNET-DRAFT R. Fielding, U.C. Irvine +draft-fielding-uri-syntax-02 L. Masinter, Xerox Corporation +Expires six months after publication date March 4, 1998 + + + Uniform Resource Identifiers (URI): Generic Syntax + +[...] + +C. Examples of Resolving Relative URI References + + Within an object with a well-defined base URI of + + http://a/b/c/d;p?q + + the relative URIs would be resolved as follows: + +C.1. Normal Examples + + g:h = g:h + g = http://a/b/c/g + ./g = http://a/b/c/g + g/ = http://a/b/c/g/ + /g = http://a/g + //g = http://g + ?y = http://a/b/c/d;p?y + g?y = http://a/b/c/g?y + #s = (current document)#s + g#s = http://a/b/c/g#s + g?y#s = http://a/b/c/g?y#s + ;x = http://a/b/c/;x + g;x = http://a/b/c/g;x + g;x?y#s = http://a/b/c/g;x?y#s + . = http://a/b/c/ + ./ = http://a/b/c/ + .. = http://a/b/ + ../ = http://a/b/ + ../g = http://a/b/g + ../.. = http://a/ + ../../ = http://a/ + ../../g = http://a/g + +C.2. Abnormal Examples + + Although the following abnormal examples are unlikely to occur in + normal practice, all URI parsers should be capable of resolving them + consistently. Each example uses the same base as above. + + An empty reference refers to the start of the current document. + + <> = (current document) + + Parsers must be careful in handling the case where there are more + relative path ".." segments than there are hierarchical levels in + the base URI's path. Note that the ".." syntax cannot be used to + change the authority component of a URI. + + ../../../g = http://a/../g + ../../../../g = http://a/../../g + + In practice, some implementations strip leading relative symbolic + elements (".", "..") after applying a relative URI calculation, based + on the theory that compensating for obvious author errors is better + than allowing the request to fail. Thus, the above two references + will be interpreted as "http://a/g" by some implementations. + + Similarly, parsers must avoid treating "." and ".." as special when + they are not complete components of a relative path. + + /./g = http://a/./g + /../g = http://a/../g + g. = http://a/b/c/g. + .g = http://a/b/c/.g + g.. = http://a/b/c/g.. + ..g = http://a/b/c/..g + + Less likely are cases where the relative URI uses unnecessary or + nonsensical forms of the "." and ".." complete path segments. + + ./../g = http://a/b/g + ./g/. = http://a/b/c/g/ + g/./h = http://a/b/c/g/h + g/../h = http://a/b/c/h + g;x=1/./y = http://a/b/c/g;x=1/y + g;x=1/../y = http://a/b/c/y + + All client applications remove the query component from the base URI + before resolving relative URIs. However, some applications fail to + separate the reference's query and/or fragment components from a + relative path before merging it with the base path. This error is + rarely noticed, since typical usage of a fragment never includes the + hierarchy ("/") character, and the query component is not normally + used within relative references. + + g?y/./x = http://a/b/c/g?y/./x + g?y/../x = http://a/b/c/g?y/../x + g#s/./x = http://a/b/c/g#s/./x + g#s/../x = http://a/b/c/g#s/../x + + Some parsers allow the scheme name to be present in a relative URI + if it is the same as the base URI scheme. This is considered to be + a loophole in prior specifications of partial URIs [RFC1630]. Its + use should be avoided. + + http:g = http:g + http: = http: + + +========================================================================== + +Some extra tests for good measure... + + #foo? = (current document)#foo? + ?#foo = http://a/b/c/d;p?#foo + diff --git a/t/clone.t b/t/clone.t new file mode 100644 index 0000000..57201f5 --- /dev/null +++ b/t/clone.t @@ -0,0 +1,21 @@ +use strict; +use warnings; + +print "1..2\n"; + +use URI::URL; + +my $b = URI::URL->new("http://www/"); + +my $u1 = URI::URL->new("foo", $b); +my $u2 = $u1->clone; + +$u1->base("http://yyy/"); + +#use Data::Dump; Data::Dump::dump($b, $u1, $u2); + +print "not " unless $u1->abs->as_string eq "http://yyy/foo"; +print "ok 1\n"; + +print "not " unless $u2->abs->as_string eq "http://www/foo"; +print "ok 2\n"; diff --git a/t/cwd.t b/t/cwd.t new file mode 100644 index 0000000..a890ee5 --- /dev/null +++ b/t/cwd.t @@ -0,0 +1,15 @@ +#!perl -T + +use strict; +use warnings; + +use Test::More; + +plan tests => 1; + +use URI::file; +$ENV{PATH} = "/bin:/usr/bin"; + +my $cwd = eval { URI::file->cwd }; +is($@, '', 'no exceptions'); + diff --git a/t/data.t b/t/data.t new file mode 100644 index 0000000..64920d9 --- /dev/null +++ b/t/data.t @@ -0,0 +1,111 @@ +use strict; +use warnings; + +eval { + require MIME::Base64; +}; +if ($@) { + print "1..0\n"; + print $@; + exit; +} + +print "1..22\n"; + +use URI; + +my $u = URI->new("data:,A%20brief%20note"); +print "not " unless $u->scheme eq "data" && $u->opaque eq ",A%20brief%20note"; +print "ok 1\n"; + +print "not " unless $u->media_type eq "text/plain;charset=US-ASCII" && + $u->data eq "A brief note"; +print "ok 2\n"; + +my $old = $u->data("Får-i-kål er tingen!"); +print "not " unless $old eq "A brief note" && $u eq "data:,F%E5r-i-k%E5l%20er%20tingen!"; +print "ok 3\n"; + +$old = $u->media_type("text/plain;charset=iso-8859-1"); +print "not " unless $old eq "text/plain;charset=US-ASCII" && + $u eq "data:text/plain;charset=iso-8859-1,F%E5r-i-k%E5l%20er%20tingen!"; +print "ok 4\n"; + + +$u = URI->new("data:image/gif;base64,R0lGODdhMAAwAPAAAAAAAP///ywAAAAAMAAwAAAC8IyPqcvt3wCcDkiLc7C0qwyGHhSWpjQu5yqmCYsapyuvUUlvONmOZtfzgFzByTB10QgxOR0TqBQejhRNzOfkVJ+5YiUqrXF5Y5lKh/DeuNcP5yLWGsEbtLiOSpa/TPg7JpJHxyendzWTBfX0cxOnKPjgBzi4diinWGdkF8kjdfnycQZXZeYGejmJlZeGl9i2icVqaNVailT6F5iJ90m6mvuTS4OK05M0vDk0Q4XUtwvKOzrcd3iq9uisF81M1OIcR7lEewwcLp7tuNNkM3uNna3F2JQFo97Vriy/Xl4/f1cf5VWzXyym7PHhhx4dbgYKAAA7"); + +print "not " unless $u->media_type eq "image/gif"; +print "ok 5\n"; + +if ($ENV{DISPLAY} && $ENV{XV}) { + open(XV, "| $ENV{XV} -") || die; + print XV $u->data; + close(XV); +} +print "not " unless length($u->data) == 273; +print "ok 6\n"; + +$u = URI->new("data:text/plain;charset=iso-8859-7,%be%fg%be"); # %fg +print "not " unless $u->data eq "\xBE%fg\xBE"; +print "ok 7\n"; + +$u = URI->new("data:application/vnd-xxx-query,select_vcount,fcol_from_fieldtable/local"); +print "not " unless $u->data eq "select_vcount,fcol_from_fieldtable/local"; +print "ok 8\n"; +$u->data(""); +print "not " unless $u eq "data:application/vnd-xxx-query,"; +print "ok 9\n"; + +$u->data("a,b"); $u->media_type(undef); +print "not " unless $u eq "data:,a,b"; +print "ok 10\n"; + +# Test automatic selection of URI/BASE64 encoding +$u = URI->new("data:"); +$u->data(""); +print "not " unless $u eq "data:,"; +print "ok 11\n"; + +$u->data(">"); +print "not " unless $u eq "data:,%3E" && $u->data eq ">"; +print "ok 12\n"; + +$u->data(">>>>>"); +print "not " unless $u eq "data:,%3E%3E%3E%3E%3E"; +print "ok 13\n"; + +$u->data(">>>>>>"); +print "not " unless $u eq "data:;base64,Pj4+Pj4+"; +print "ok 14\n"; + +$u->media_type("text/plain;foo=bar"); +print "not " unless $u eq "data:text/plain;foo=bar;base64,Pj4+Pj4+"; +print "ok 15\n"; + +$u->media_type("foo"); +print "not " unless $u eq "data:foo;base64,Pj4+Pj4+"; +print "ok 16\n"; + +$u->data(">" x 3000); +print "not " unless $u eq ("data:foo;base64," . ("Pj4+" x 1000)) && + $u->data eq (">" x 3000); +print "ok 17\n"; + +$u->media_type(undef); +$u->data(undef); +print "not " unless $u eq "data:,"; +print "ok 18\n"; + +$u = URI->new("data:foo"); +print "not " unless $u->media_type("bar,båz") eq "foo"; +print "ok 19\n"; + +print "not " unless $u->media_type eq "bar,båz"; +print "ok 20\n"; + +$old = $u->data("new"); +print "not " unless $old eq "" && $u eq "data:bar%2Cb%E5z,new"; +print "ok 21\n"; + +print "not " unless URI->new('data:;base64,%51%6D%70%76%5A%58%4A%75')->data eq "Bjoern"; +print "ok 22\n"; diff --git a/t/distmanifest.t b/t/distmanifest.t new file mode 100644 index 0000000..c2812f7 --- /dev/null +++ b/t/distmanifest.t @@ -0,0 +1,11 @@ +use strict; +use warnings; + +use Test::More; +BEGIN { + plan skip_all => 'these tests are for authors only!' + unless -d '.git' || $ENV{AUTHOR_TESTING}; +} + +use Test::DistManifest; +manifest_ok(); diff --git a/t/escape-char.t b/t/escape-char.t new file mode 100644 index 0000000..b03e43d --- /dev/null +++ b/t/escape-char.t @@ -0,0 +1,29 @@ +use strict; +use warnings; + +# see https://rt.cpan.org/Ticket/Display.html?id=96941 + +use Test::More; +use URI; + +TODO: { + my $str = "http://foo/\xE9"; + utf8::upgrade($str); + my $uri = URI->new($str); + + local $TODO = 'URI::Escape::escape_char misunderstands utf8'; + + # http://foo/%C3%A9 + is("$uri", 'http://foo/%E9', 'correctly created a URI from a utf8-upgraded string'); +} + +{ + my $str = "http://foo/\xE9"; + utf8::downgrade($str); + my $uri = URI->new($str); + + # http://foo/%E9 + is("$uri", 'http://foo/%E9', 'correctly created a URI from a utf8-downgrade string'); +} + +done_testing; diff --git a/t/escape.t b/t/escape.t new file mode 100644 index 0000000..05b8022 --- /dev/null +++ b/t/escape.t @@ -0,0 +1,37 @@ +use strict; +use warnings; + +use Test::More tests => 12; + +use URI::Escape; + +is uri_escape("|abcå"), "%7Cabc%E5"; + +is uri_escape("abc", "b-d"), "a%62%63"; + +# New escapes in RFC 3986 +is uri_escape("~*'()"), "~%2A%27%28%29"; +is uri_escape("<\">"), "%3C%22%3E"; + +is uri_escape(undef), undef; + +is uri_unescape("%7Cabc%e5"), "|abcå"; + +is_deeply [uri_unescape("%40A%42", "CDE", "F%47H")], [qw(@AB CDE FGH)]; + + +use URI::Escape qw(%escapes); + +is $escapes{"%"}, "%25"; + + +use URI::Escape qw(uri_escape_utf8); + +is uri_escape_utf8("|abcå"), "%7Cabc%C3%A5"; + +skip "Perl 5.8.0 or higher required", 3 if $] < 5.008; + +ok !eval { print uri_escape("abc" . chr(300)); 1 }; +like $@, qr/^Can\'t escape \\x\{012C\}, try uri_escape_utf8\(\) instead/; + +is uri_escape_utf8(chr(0xFFF)), "%E0%BF%BF"; diff --git a/t/file.t b/t/file.t new file mode 100644 index 0000000..26e0119 --- /dev/null +++ b/t/file.t @@ -0,0 +1,65 @@ +#!perl -T + +use strict; +use warnings; + +use URI::file; + +my @tests = ( +[ "file", "unix", "win32", "mac" ], +#---------------- ------------ --------------- -------------- +[ "file://localhost/foo/bar", + "!/foo/bar", "!\\foo\\bar", "!foo:bar", ], +[ "file:///foo/bar", + "/foo/bar", "\\foo\\bar", "!foo:bar", ], +[ "file:/foo/bar", "!/foo/bar", "!\\foo\\bar", "foo:bar", ], +[ "foo/bar", "foo/bar", "foo\\bar", ":foo:bar",], +[ "file://foo3445x/bar","!//foo3445x/bar", "!\\\\foo3445x\\bar", "!foo3445x:bar"], +[ "file://a:/", "!//a:/", "!A:\\", undef], +[ "file:///A:/", "/A:/", "A:\\", undef], +[ "file:///", "/", "\\", undef], +[ ".", ".", ".", ":"], +[ "..", "..", "..", "::"], +[ "%2E", "!.", "!.", ":."], +[ "../%2E%2E", "!../..", "!..\\..", "::.."], +); + +my @os = @{shift @tests}; +shift @os; # file + +my $num = @tests; +print "1..$num\n"; + +my $testno = 1; + +for my $t (@tests) { + my @t = @$t; + my $file = shift @t; + my $err; + + my $u = URI->new($file, "file"); + my $i = 0; + for my $os (@os) { + my $f = $u->file($os); + my $expect = $t[$i]; + $f = "" unless defined $f; + $expect = "" unless defined $expect; + my $loose; + $loose++ if $expect =~ s/^!//; + if ($expect ne $f) { + print "URI->new('$file', 'file')->file('$os') ne $expect, but $f\n"; + $err++; + } + if (defined($t[$i]) && !$loose) { + my $u2 = URI::file->new($t[$i], $os); + unless ($u2->as_string eq $file) { + print "URI::file->new('$t[$i]', '$os') ne $file, but $u2\n"; + $err++; + } + } + $i++; + } + print "not " if $err; + print "ok $testno\n"; + $testno++; +} diff --git a/t/ftp.t b/t/ftp.t new file mode 100644 index 0000000..9340885 --- /dev/null +++ b/t/ftp.t @@ -0,0 +1,53 @@ +use strict; +use warnings; + +print "1..13\n"; + +use URI; +my $uri; + +$uri = URI->new("ftp://ftp.example.com/path"); + +print "not " unless $uri->scheme eq "ftp"; +print "ok 1\n"; + +print "not " unless $uri->host eq "ftp.example.com"; +print "ok 2\n"; + +print "not " unless $uri->port eq 21; +print "ok 3\n"; + +print "not " unless $uri->user eq "anonymous"; +print "ok 4\n"; + +print "not " unless $uri->password eq 'anonymous@'; +print "ok 5\n"; + +$uri->userinfo("gisle\@aas.no"); + +print "not " unless $uri eq "ftp://gisle%40aas.no\@ftp.example.com/path"; +print "ok 6\n"; + +print "not " unless $uri->user eq "gisle\@aas.no"; +print "ok 7\n"; + +print "not " if defined($uri->password); +print "ok 8\n"; + +$uri->password("secret"); + +print "not " unless $uri eq "ftp://gisle%40aas.no:secret\@ftp.example.com/path"; +print "ok 9\n"; + +$uri = URI->new("ftp://gisle\@aas.no:secret\@ftp.example.com/path"); +print "not " unless $uri eq "ftp://gisle\@aas.no:secret\@ftp.example.com/path"; +print "ok 10\n"; + +print "not " unless $uri->userinfo eq "gisle\@aas.no:secret"; +print "ok 11\n"; + +print "not " unless $uri->user eq "gisle\@aas.no"; +print "ok 12\n"; + +print "not " unless $uri->password eq "secret"; +print "ok 13\n"; diff --git a/t/generic.t b/t/generic.t new file mode 100644 index 0000000..e2f7b97 --- /dev/null +++ b/t/generic.t @@ -0,0 +1,219 @@ +use strict; +use warnings; + +print "1..48\n"; + +use URI; + +my $foo = URI->new("Foo:opaque#frag"); + +print "not " unless ref($foo) eq "URI::_foreign"; +print "ok 1\n"; + +print "not " unless $foo->as_string eq "Foo:opaque#frag"; +print "ok 2\n"; + +print "not " unless "$foo" eq "Foo:opaque#frag"; +print "ok 3\n"; + +# Try accessors +print "not " unless $foo->_scheme eq "Foo" && $foo->scheme eq "foo" && !$foo->has_recognized_scheme; +print "ok 4\n"; + +print "not " unless $foo->opaque eq "opaque"; +print "ok 5\n"; + +print "not " unless $foo->fragment eq "frag"; +print "ok 6\n"; + +print "not " unless $foo->canonical eq "foo:opaque#frag"; +print "ok 7\n"; + +# Try modificators +my $old = $foo->scheme("bar"); + +print "not " unless $old eq "foo" && $foo eq "bar:opaque#frag"; +print "ok 8\n"; + +$old = $foo->scheme(""); +print "not " unless $old eq "bar" && $foo eq "opaque#frag"; +print "ok 9\n"; + +$old = $foo->scheme("foo"); +$old = $foo->scheme(undef); + +print "not " unless $old eq "foo" && $foo eq "opaque#frag"; +print "ok 10\n"; + +$foo->scheme("foo"); + + +$old = $foo->opaque("xxx"); +print "not " unless $old eq "opaque" && $foo eq "foo:xxx#frag"; +print "ok 11\n"; + +$old = $foo->opaque(""); +print "not " unless $old eq "xxx" && $foo eq "foo:#frag"; +print "ok 12\n"; + +$old = $foo->opaque(" #?/"); +$old = $foo->opaque(undef); +print "not " unless $old eq "%20%23?/" && $foo eq "foo:#frag"; +print "ok 13\n"; + +$foo->opaque("opaque"); + + +$old = $foo->fragment("x"); +print "not " unless $old eq "frag" && $foo eq "foo:opaque#x"; +print "ok 14\n"; + +$old = $foo->fragment(""); +print "not " unless $old eq "x" && $foo eq "foo:opaque#"; +print "ok 15\n"; + +$old = $foo->fragment(undef); +print "not " unless $old eq "" && $foo eq "foo:opaque"; +print "ok 16\n"; + + +# Compare +print "not " unless $foo->eq("Foo:opaque") && + $foo->eq(URI->new("FOO:opaque")) && + $foo->eq("foo:opaque"); +print "ok 17\n"; + +print "not " if $foo->eq("Bar:opaque") || + $foo->eq("foo:opaque#"); +print "ok 18\n"; + + +# Try hierarchal unknown URLs + +$foo = URI->new("foo://host:80/path?query#frag"); + +print "not " unless "$foo" eq "foo://host:80/path?query#frag"; +print "ok 19\n"; + +# Accessors +print "not " unless $foo->scheme eq "foo"; +print "ok 20\n"; + +print "not " unless $foo->authority eq "host:80"; +print "ok 21\n"; + +print "not " unless $foo->path eq "/path"; +print "ok 22\n"; + +print "not " unless $foo->query eq "query"; +print "ok 23\n"; + +print "not " unless $foo->fragment eq "frag"; +print "ok 24\n"; + +# Modificators +$old = $foo->authority("xxx"); +print "not " unless $old eq "host:80" && $foo eq "foo://xxx/path?query#frag"; +print "ok 25\n"; + +$old = $foo->authority(""); +print "not " unless $old eq "xxx" && $foo eq "foo:///path?query#frag"; +print "ok 26\n"; + +$old = $foo->authority(undef); +print "not " unless $old eq "" && $foo eq "foo:/path?query#frag"; +print "ok 27\n"; + +$old = $foo->authority("/? #;@&"); +print "not " unless !defined($old) && $foo eq "foo://%2F%3F%20%23;@&/path?query#frag"; +print "ok 28\n"; + +$old = $foo->authority("host:80"); +print "not " unless $old eq "%2F%3F%20%23;@&" && $foo eq "foo://host:80/path?query#frag"; +print "ok 29\n"; + + +$old = $foo->path("/foo"); +print "not " unless $old eq "/path" && $foo eq "foo://host:80/foo?query#frag"; +print "ok 30\n"; + +$old = $foo->path("bar"); +print "not " unless $old eq "/foo" && $foo eq "foo://host:80/bar?query#frag"; +print "ok 31\n"; + +$old = $foo->path(""); +print "not " unless $old eq "/bar" && $foo eq "foo://host:80?query#frag"; +print "ok 32\n"; + +$old = $foo->path(undef); +print "not " unless $old eq "" && $foo eq "foo://host:80?query#frag"; +print "ok 33\n"; + +$old = $foo->path("@;/?#"); +print "not " unless $old eq "" && $foo eq "foo://host:80/@;/%3F%23?query#frag"; +print "ok 34\n"; + +$old = $foo->path("path"); +print "not " unless $old eq "/@;/%3F%23" && $foo eq "foo://host:80/path?query#frag"; +print "ok 35\n"; + + +$old = $foo->query("foo"); +print "not " unless $old eq "query" && $foo eq "foo://host:80/path?foo#frag"; +print "ok 36\n"; + +$old = $foo->query(""); +print "not " unless $old eq "foo" && $foo eq "foo://host:80/path?#frag"; +print "ok 37\n"; + +$old = $foo->query(undef); +print "not " unless $old eq "" && $foo eq "foo://host:80/path#frag"; +print "ok 38\n"; + +$old = $foo->query("/?&=# "); +print "not " unless !defined($old) && $foo eq "foo://host:80/path?/?&=%23%20#frag"; +print "ok 39\n"; + +$old = $foo->query("query"); +print "not " unless $old eq "/?&=%23%20" && $foo eq "foo://host:80/path?query#frag"; +print "ok 40\n"; + +# Some buildup trics +$foo = URI->new(""); +$foo->path("path"); +$foo->authority("auth"); + +print "not " unless $foo eq "//auth/path"; +print "ok 41\n"; + +$foo = URI->new("", "http:"); +$foo->query("query"); +$foo->authority("auth"); +print "not " unless $foo eq "//auth?query" && $foo->has_recognized_scheme; +print "ok 42\n"; + +$foo->path("path"); +print "not " unless $foo eq "//auth/path?query"; +print "ok 43\n"; + +$foo = URI->new(""); +$old = $foo->path("foo"); +print "not " unless $old eq "" && $foo eq "foo" && !$foo->has_recognized_scheme; +print "ok 44\n"; + +$old = $foo->path("bar"); +print "not " unless $old eq "foo" && $foo eq "bar"; +print "ok 45\n"; + +$old = $foo->opaque("foo"); +print "not " unless $old eq "bar" && $foo eq "foo"; +print "ok 46\n"; + +$old = $foo->path(""); +print "not " unless $old eq "foo" && $foo eq ""; +print "ok 47\n"; + +$old = $foo->query("q"); +print "not " unless !defined($old) && $foo eq "?q"; +print "ok 48\n"; + diff --git a/t/gopher.t b/t/gopher.t new file mode 100644 index 0000000..427a5fc --- /dev/null +++ b/t/gopher.t @@ -0,0 +1,46 @@ +use strict; +use warnings; + +print "1..48\n"; + +use URI; + +my $t = 1; +sub is { + my ($exp, $got) = @_; + if (!defined $exp) { + print "not " if defined $got; + } + else { + print "not " unless $got eq $exp; + } + print "ok " . ($t++) . "\n"; +} + +sub check_gopher_uri { + my ($u, $exphost, $expport, $exptype, $expselector, $expsearch) = @_; + is("gopher", $u->scheme); + is($exphost, $u->host); + is($expport, $u->port); + is($exptype, $u->gopher_type); + is($expselector, $u->selector); + is($expsearch, $u->search); +} + +my $u; +$u = URI->new("gopher://host"); +check_gopher_uri($u, "host", 70, 1); +$u = URI->new("gopher://host:70"); +check_gopher_uri($u, "host", 70, 1); +$u = URI->new("gopher://host:70/"); +check_gopher_uri($u, "host", 70, 1); +$u = URI->new("gopher://host:70/1"); +check_gopher_uri($u, "host", 70, 1); +$u = URI->new("gopher://host:70/1"); +check_gopher_uri($u, "host", 70, 1); +$u = URI->new("gopher://host:123/7foo"); +check_gopher_uri($u, "host", 123, 7, "foo"); +$u = URI->new("gopher://host/7foo\tbar%20baz"); +check_gopher_uri($u, "host", 70, 7, "foo", "bar baz"); +$u = URI->new("gopher://host/7foo%09bar%20baz"); +check_gopher_uri($u, "host", 70, 7, "foo", "bar baz"); diff --git a/t/heuristic.t b/t/heuristic.t new file mode 100644 index 0000000..63c2ad8 --- /dev/null +++ b/t/heuristic.t @@ -0,0 +1,138 @@ +use strict; +use warnings; + +BEGIN { + # mock up a gethostbyname that always works :-) + *CORE::GLOBAL::gethostbyname = sub { + my $name = shift; + #print "# gethostbyname [$name]\n"; + die if wantarray; + return 1 if $name =~ /^www\.perl\.(com|org|ca|su)\.$/; + return 1 if $name eq "www.perl.co.uk\."; + return 0; + }; +} + +print "1..26\n"; + +use URI::Heuristic qw(uf_urlstr uf_url); +if (shift) { + $URI::Heuristic::DEBUG++; + open(STDERR, ">&STDOUT"); # redirect STDERR +} + +print "not " unless uf_urlstr("http://www.sn.no/") eq "http://www.sn.no/"; +print "ok 1\n"; + +if ($^O eq "MacOS") { + print "not " unless uf_urlstr("etc:passwd") eq "file:/etc/passwd"; +} else { +print "not " unless uf_urlstr("/etc/passwd") eq "file:/etc/passwd"; +} +print "ok 2\n"; + +if ($^O eq "MacOS") { + print "not " unless uf_urlstr(":foo.txt") eq "file:./foo.txt"; +} else { +print "not " unless uf_urlstr("./foo.txt") eq "file:./foo.txt"; +} +print "ok 3\n"; + +print "not " unless uf_urlstr("ftp.aas.no/lwp.tar.gz") eq "ftp://ftp.aas.no/lwp.tar.gz"; +print "ok 4\n"; + +if($^O eq "MacOS") { +# its a weird, but valid, MacOS path, so it can't be left alone + print "not " unless uf_urlstr("C:\\CONFIG.SYS") eq "file:/C/%5CCONFIG.SYS"; +} else { +print "not " unless uf_urlstr("C:\\CONFIG.SYS") eq "file:C:\\CONFIG.SYS"; +} +print "ok 5\n"; + +{ + local $ENV{LC_ALL} = ""; + local $ENV{LANG} = ""; + local $ENV{HTTP_ACCEPT_LANGUAGE} = ""; + + $ENV{LC_ALL} = "en_GB.UTF-8"; + undef $URI::Heuristic::MY_COUNTRY; + print "not " unless uf_urlstr("perl/camel.gif") =~ m,^http://www\.perl\.(org|co)\.uk/camel\.gif$,; + print "ok 6\n"; + + use Net::Domain; + $ENV{LC_ALL} = "C"; + { no warnings; *Net::Domain::hostfqdn = sub { return 'vasya.su' } } + undef $URI::Heuristic::MY_COUNTRY; + print "not " unless uf_urlstr("perl/camel.gif") =~ m,^http://www\.perl\.su/camel\.gif$,; + print "ok 7\n"; + + $ENV{LC_ALL} = "C"; + { no warnings; *Net::Domain::hostfqdn = sub { return '' } } + undef $URI::Heuristic::MY_COUNTRY; + print "not " unless uf_urlstr("perl/camel.gif") =~ m,^http://www\.perl\.(com|org)/camel\.gif$,; + print "ok 8\n"; + + $ENV{HTTP_ACCEPT_LANGUAGE} = "en-ca"; + undef $URI::Heuristic::MY_COUNTRY; + print "not " unless uf_urlstr("perl/camel.gif") eq "http://www.perl.ca/camel.gif"; + print "ok 9\n"; +} + +$URI::Heuristic::MY_COUNTRY = "bv"; +print "not " unless uf_urlstr("perl/camel.gif") =~ m,^http://www\.perl\.(com|org)/camel\.gif$,; +print "ok 10\n"; + +# Backwards compatibility; uk != United Kingdom in ISO 3166 +$URI::Heuristic::MY_COUNTRY = "uk"; +print "not " unless uf_urlstr("perl/camel.gif") =~ m,^http://www\.perl\.(org|co)\.uk/camel\.gif$,; +print "ok 11\n"; + +$URI::Heuristic::MY_COUNTRY = "gb"; +print "not " unless uf_urlstr("perl/camel.gif") =~ m,^http://www\.perl\.(org|co)\.uk/camel\.gif$,; +print "ok 12\n"; + +$ENV{URL_GUESS_PATTERN} = "www.ACME.org www.ACME.com"; +print "not " unless uf_urlstr("perl") eq "http://www.perl.org"; +print "ok 13\n"; + +{ + local $ENV{URL_GUESS_PATTERN} = ""; + print "not " unless uf_urlstr("perl") eq "http://perl"; + print "ok 14\n"; + + print "not " unless uf_urlstr("http:80") eq "http:80"; + print "ok 15\n"; + + print "not " unless uf_urlstr("mailto:gisle\@aas.no") eq "mailto:gisle\@aas.no"; + print "ok 16\n"; + + print "not " unless uf_urlstr("gisle\@aas.no") eq "mailto:gisle\@aas.no"; + print "ok 17\n"; + + print "not " unless uf_urlstr("Gisle.Aas\@aas.perl.org") eq "mailto:Gisle.Aas\@aas.perl.org"; + print "ok 18\n"; + + print "not " unless uf_url("gopher.sn.no")->scheme eq "gopher"; + print "ok 19\n"; + + print "not " unless uf_urlstr("123.3.3.3:8080/foo") eq "http://123.3.3.3:8080/foo"; + print "ok 20\n"; + + print "not " unless uf_urlstr("123.3.3.3:443/foo") eq "https://123.3.3.3:443/foo"; + print "ok 21\n"; + + print "not " unless uf_urlstr("123.3.3.3:21/foo") eq "ftp://123.3.3.3:21/foo"; + print "ok 22\n"; + + print "not " unless uf_url("FTP.example.com")->scheme eq "ftp"; + print "ok 23\n"; + + print "not " unless uf_url("ftp2.example.com")->scheme eq "ftp"; + print "ok 24\n"; + + print "not " unless uf_url("ftp")->scheme eq "ftp"; + print "ok 25\n"; + + print "not " unless uf_url("https.example.com")->scheme eq "https"; + print "ok 26\n"; +} diff --git a/t/http.t b/t/http.t new file mode 100644 index 0000000..fb30124 --- /dev/null +++ b/t/http.t @@ -0,0 +1,66 @@ +use strict; +use warnings; + +print "1..16\n"; + +use URI; + +my $u = URI->new(""); + +#print "$u\n"; +print "not " unless $u eq "http://www.perl.com/path?q=f%F4o"; +print "ok 1\n"; + +print "not " unless $u->port == 80; +print "ok 2\n"; + +# play with port +my $old = $u->port(8080); +print "not " unless $old == 80 && $u eq "http://www.perl.com:8080/path?q=f%F4o"; +print "ok 3\n"; + +$u->port(80); +print "not " unless $u eq "http://www.perl.com:80/path?q=f%F4o"; +print "ok 4\n"; + +$u->port(""); +print "not " unless $u eq "http://www.perl.com:/path?q=f%F4o" && $u->port == 80; +print "ok 5\n"; + +$u->port(undef); +print "not " unless $u eq "http://www.perl.com/path?q=f%F4o"; +print "ok 6\n"; + +my @q = $u->query_form; +print "not " unless @q == 2 && "@q" eq "q fôo"; +print "ok 7\n"; + +$u->query_form(foo => "bar", bar => "baz"); +print "not " unless $u->query eq "foo=bar&bar=baz"; +print "ok 8\n"; + +print "not " unless $u->host eq "www.perl.com"; +print "ok 9\n"; + +print "not " unless $u->path eq "/path"; +print "ok 10\n"; + +print "not " if $u->secure; +print "ok 11\n"; + +$u->scheme("https"); +print "not " unless $u->port == 443; +print "ok 12\n"; + +print "not " unless $u eq "https://www.perl.com/path?foo=bar&bar=baz"; +print "ok 13\n"; + +print "not " unless $u->secure; +print "ok 14\n"; + +$u = URI->new("http://%77%77%77%2e%70%65%72%6c%2e%63%6f%6d/%70%75%62/%61/%32%30%30%31/%30%38/%32%37/%62%6a%6f%72%6e%73%74%61%64%2e%68%74%6d%6c"); +print "not " unless $u->canonical eq "http://www.perl.com/pub/a/2001/08/27/bjornstad.html"; +print "ok 15\n"; + +print "not " unless $u->has_recognized_scheme; +print "ok 16\n"; diff --git a/t/idna.t b/t/idna.t new file mode 100644 index 0000000..da2ad98 --- /dev/null +++ b/t/idna.t @@ -0,0 +1,14 @@ +use strict; +use warnings; + +use utf8; +use Test::More tests => 7; +use URI::_idna; + +is URI::_idna::encode("www.example.com"), "www.example.com"; +is URI::_idna::decode("www.example.com"), "www.example.com"; +is URI::_idna::encode("www.example.com."), "www.example.com."; +is URI::_idna::decode("www.example.com."), "www.example.com."; +is URI::_idna::encode("Bücher.ch"), "xn--bcher-kva.ch"; +is URI::_idna::decode("xn--bcher-kva.ch"), "bücher.ch"; +is URI::_idna::decode("xn--bcher-KVA.ch"), "bücher.ch"; diff --git a/t/iri.t b/t/iri.t new file mode 100644 index 0000000..f1dfd51 --- /dev/null +++ b/t/iri.t @@ -0,0 +1,76 @@ +use strict; +use warnings; + +use utf8; +use Test::More; +use Config; + +if (defined $Config{useperlio}) { + plan tests=>26; +} else { + plan skip_all=>'this perl doesn\'t support PerlIO layers'; +} + +use URI; +use URI::IRI; + +my $u; + +binmode Test::More->builder->output, ':encoding(UTF-8)'; +binmode Test::More->builder->failure_output, ':encoding(UTF-8)'; + +$u = URI->new("http://Bücher.ch"); +is $u, "http://xn--bcher-kva.ch"; +is $u->host, "xn--bcher-kva.ch"; +is $u->ihost, "bücher.ch"; +is $u->as_iri, "http://bücher.ch"; + +$u = URI->new("http://example.com/Bücher"); +is $u, "http://example.com/B%C3%BCcher"; +is $u->as_iri, "http://example.com/Bücher"; + +$u = URI->new("http://example.com/B%FCcher"); # latin1 encoded stuff +is $u->as_iri, "http://example.com/B%FCcher"; # ...should not be decoded + +$u = URI->new("http://example.com/B\xFCcher"); +is $u->as_string, "http://example.com/B%FCcher"; +is $u->as_iri, "http://example.com/B%FCcher"; + +$u = URI::IRI->new("http://example.com/B\xFCcher"); +is $u->as_string, "http://example.com/Bücher"; +is $u->as_iri, "http://example.com/Bücher"; + +# draft-duerst-iri-bis.txt claims this should map to xn--rsum-bad.example.org +$u = URI->new("http://r\xE9sum\xE9.example.org"); +is $u->as_string, "http://xn--rsum-bpad.example.org"; + +$u = URI->new("http://xn--rsum-bad.example.org"); +is $u->as_iri, "http://r\x80sum\x80.example.org"; + +$u = URI->new("http://r%C3%A9sum%C3%A9.example.org"); +is $u->as_string, "http://r%C3%A9sum%C3%A9.example.org"; +is $u->as_iri, "http://r\xE9sum\xE9.example.org"; + +$u = URI->new("http://âž¡.ws/"); +is $u, "http://xn--hgi.ws/"; +is $u->host, "xn--hgi.ws"; +is $u->ihost, "âž¡.ws"; +is $u->as_iri, "http://âž¡.ws/"; + +# draft-duerst-iri-bis.txt examples (section 3.7.1): +is(URI->new("http://www.example.org/D%C3%BCrst")->as_iri, "http://www.example.org/D\xFCrst"); +is(URI->new("http://www.example.org/D%FCrst")->as_iri, "http://www.example.org/D%FCrst"); +TODO: { + local $TODO = "some chars (like U+202E, RIGHT-TO-LEFT OVERRIDE) need to stay escaped"; +is(URI->new("http://xn--99zt52a.example.org/%e2%80%ae")->as_iri, "http://\x{7D0D}\x{8C46}.example.org/%e2%80%ae"); +} + +# try some URLs that can't be IDNA encoded (fallback to encoded UTF8 bytes) +$u = URI->new("http://" . ("ü" x 128)); +is $u, "http://" . ("%C3%BC" x 128); +is $u->host, ("\xC3\xBC" x 128); +TODO: { + local $TODO = "should ihost decode UTF8 bytes?"; + is $u->ihost, ("ü" x 128); +} +is $u->as_iri, "http://" . ("ü" x 128); diff --git a/t/ldap.t b/t/ldap.t new file mode 100644 index 0000000..3cd3dd8 --- /dev/null +++ b/t/ldap.t @@ -0,0 +1,119 @@ +use strict; +use warnings; + +print "1..24\n"; + +use URI; + +my $uri; + +$uri = URI->new("ldap://host/dn=base?cn,sn?sub?objectClass=*"); + +print "not " unless $uri->host eq "host"; +print "ok 1\n"; + +print "not " unless $uri->dn eq "dn=base"; +print "ok 2\n"; + +print "not " unless join("-",$uri->attributes) eq "cn-sn"; +print "ok 3\n"; + +print "not " unless $uri->scope eq "sub"; +print "ok 4\n"; + +print "not " unless $uri->filter eq "objectClass=*"; +print "ok 5\n"; + +$uri = URI->new("ldap:"); +$uri->dn("o=University of Michigan,c=US"); + +print "not " unless "$uri" eq "ldap:o=University%20of%20Michigan,c=US" && + $uri->dn eq "o=University of Michigan,c=US"; +print "ok 6\n"; + +$uri->host("ldap.itd.umich.edu"); +print "not " unless $uri->as_string eq "ldap://ldap.itd.umich.edu/o=University%20of%20Michigan,c=US"; +print "ok 7\n"; + +# check defaults +print "not " unless $uri->_scope eq "" && + $uri->scope eq "base" && + $uri->_filter eq "" && + $uri->filter eq "(objectClass=*)"; +print "ok 8\n"; + +# attribute +$uri->attributes("postalAddress"); +print "not " unless $uri eq "ldap://ldap.itd.umich.edu/o=University%20of%20Michigan,c=US?postalAddress"; +print "ok 9\n"; + +# does attribute escapeing work as it should +$uri->attributes($uri->attributes, "foo", ",", "*", "?", "#", "\0"); + +print "not " unless $uri->attributes eq "postalAddress,foo,%2C,*,%3F,%23,%00" && + join("-", $uri->attributes) eq "postalAddress-foo-,-*-?-#-\0"; +print "ok 10\n"; +$uri->attributes(""); + +$uri->scope("sub?#"); +print "not " unless $uri->query eq "?sub%3F%23" && + $uri->scope eq "sub?#"; +print "ok 11\n"; +$uri->scope(""); + +$uri->filter("f=?,#"); +print "not " unless $uri->query eq "??f=%3F,%23" && + $uri->filter eq "f=?,#"; + +$uri->filter("(int=\\00\\00\\00\\04)"); +print "not " unless $uri->query eq "??(int=%5C00%5C00%5C00%5C04)"; +print "ok 12\n"; + + +print "ok 13\n"; +$uri->filter(""); + +$uri->extensions("!bindname" => "cn=Manager,co=Foo"); +my %ext = $uri->extensions; + +print "not " unless $uri->query eq "???!bindname=cn=Manager%2Cco=Foo" && + keys %ext == 1 && + $ext{"!bindname"} eq "cn=Manager,co=Foo"; +print "ok 14\n"; + +$uri = URI->new("ldap://LDAP-HOST:389/o=University%20of%20Michigan,c=US?postalAddress?base?ObjectClass=*?FOO=Bar,bindname=CN%3DManager%CO%3dFoo"); + +print "not " unless $uri->canonical eq "ldap://ldap-host/o=University%20of%20Michigan,c=US?postaladdress???foo=Bar,bindname=CN=Manager%CO=Foo"; +print "ok 15\n"; + +print "$uri\n"; +print $uri->canonical, "\n"; + +print "not " if $uri->secure; +print "ok 16\n"; + +$uri = URI->new("ldaps://host/dn=base?cn,sn?sub?objectClass=*"); + +print "not " unless $uri->host eq "host"; +print "ok 17\n"; +print "not " unless $uri->port eq 636; +print "ok 18\n"; +print "not " unless $uri->dn eq "dn=base"; +print "ok 19\n"; +print "not " unless $uri->secure; +print "ok 20\n"; + +$uri = URI->new("ldapi://%2Ftmp%2Fldap.sock/????x-mod=-w--w----"); +print "not " unless $uri->authority eq "%2Ftmp%2Fldap.sock"; +print "ok 21\n"; +print "not " unless $uri->un_path eq "/tmp/ldap.sock"; +print "ok 22\n"; + +$uri->un_path("/var/x\@foo:bar/"); +print "not " unless $uri eq "ldapi://%2Fvar%2Fx%40foo%3Abar%2F/????x-mod=-w--w----"; +print "ok 23\n"; + +%ext = $uri->extensions; +print "not " unless $ext{"x-mod"} eq "-w--w----"; +print "ok 24\n"; + diff --git a/t/mailto.t b/t/mailto.t new file mode 100644 index 0000000..f13a1f8 --- /dev/null +++ b/t/mailto.t @@ -0,0 +1,48 @@ +use strict; +use warnings; + +print "1..7\n"; + +use URI; + +my $u = URI->new('mailto:gisle@aas.no'); + +print "not " unless $u->to eq 'gisle@aas.no' && + $u eq 'mailto:gisle@aas.no'; +print "ok 1\n"; + +my $old = $u->to('larry@wall.org'); +print "not " unless $old eq 'gisle@aas.no' && + $u->to eq 'larry@wall.org' && + $u eq 'mailto:larry@wall.org'; +print "ok 2\n"; + +$u->to("?/#"); +print "not " unless $u->to eq "?/#" && + $u eq 'mailto:%3F/%23'; +print "ok 3\n"; + +my @h = $u->headers; +print "not " unless @h == 2 && "@h" eq "to ?/#"; +print "ok 4\n"; + +$u->headers(to => 'gisle@aas.no', + cc => 'gisle@ActiveState.com,larry@wall.org', + Subject => 'How do you do?', + garbage => '/;?#=&', +); + +@h = $u->headers; +print "not " unless $u->to eq 'gisle@aas.no' && + @h == 8 && + "@h" eq 'to gisle@aas.no cc gisle@ActiveState.com,larry@wall.org Subject How do you do? garbage /;?#=&'; +print "ok 5\n"; + +#print "$u\n"; +print "not " unless $u eq 'mailto:gisle@aas.no?cc=gisle%40ActiveState.com%2Clarry%40wall.org&Subject=How+do+you+do%3F&garbage=%2F%3B%3F%23%3D%26'; +print "ok 6\n"; + +$u = URI->new("mailto:"); +$u->to("gisle"); +print "not " unless $u eq 'mailto:gisle'; +print "ok 7\n"; diff --git a/t/mix.t b/t/mix.t new file mode 100644 index 0000000..b72942a --- /dev/null +++ b/t/mix.t @@ -0,0 +1,80 @@ +use strict; +use warnings; + +print "1..6\n"; + +# Test mixing of URI and URI::WithBase objects +use URI; +use URI::WithBase; +use URI::URL; + +my $str = "http://www.sn.no/"; +my $rel = "path/img.gif"; + +my $u = URI->new($str); +my $uw = URI::WithBase->new($str, "http:"); +my $uu = URI::URL->new($str); + +my $a = URI->new($rel, $u); +my $b = URI->new($rel, $uw); +my $c = URI->new($rel, $uu); +my $d = URI->new($rel, $str); + +sub Dump +{ + require Data::Dumper; + print Data::Dumper->Dump([$a, $b, $c, $d], [qw(a b c d)]); +} + +#Dump(); +print "not " unless $a->isa("URI") && + ref($b) eq ref($uw) && + ref($c) eq ref($uu) && + $d->isa("URI"); +print "ok 1\n"; + +print "not " if $b->base && $c->base; +print "ok 2\n"; + +$a = URI::URL->new($rel, $u); +$b = URI::URL->new($rel, $uw); +$c = URI::URL->new($rel, $uu); +$d = URI::URL->new($rel, $str); + +print "not " unless ref($a) eq "URI::URL" && + ref($b) eq "URI::URL" && + ref($c) eq "URI::URL" && + ref($d) eq "URI::URL"; +print "ok 3\n"; + +print "not " unless ref($b->base) eq ref($uw) && + $b->base eq $uw && + ref($c->base) eq ref($uu) && + $c->base eq $uu && + $d->base eq $str; +print "ok 4\n"; + + + +$a = URI->new($uu, $u); +$b = URI->new($uu, $uw); +$c = URI->new($uu, $uu); +$d = URI->new($uu, $str); + +#Dump(); +print "not " unless ref($a) eq ref($b) && + ref($b) eq ref($c) && + ref($c) eq ref($d) && + ref($d) eq ref($u); +print "ok 5\n"; + +$a = URI::URL->new($u, $u); +$b = URI::URL->new($u, $uw); +$c = URI::URL->new($u, $uu); +$d = URI::URL->new($u, $str); + +print "not " unless ref($a) eq "URI::URL" && + ref($b) eq "URI::URL" && + ref($c) eq "URI::URL" && + ref($d) eq "URI::URL"; +print "ok 6\n"; diff --git a/t/mms.t b/t/mms.t new file mode 100644 index 0000000..d3ac1d1 --- /dev/null +++ b/t/mms.t @@ -0,0 +1,38 @@ +use strict; +use warnings; + +print "1..8\n"; + +use URI; + +my $u = URI->new(""); + +#print "$u\n"; +print "not " unless $u eq "mms://66.250.188.13/KFOG_FM"; +print "ok 1\n"; + +print "not " unless $u->port == 1755; +print "ok 2\n"; + +# play with port +my $old = $u->port(8755); +print "not " unless $old == 1755 && $u eq "mms://66.250.188.13:8755/KFOG_FM"; +print "ok 3\n"; + +$u->port(1755); +print "not " unless $u eq "mms://66.250.188.13:1755/KFOG_FM"; +print "ok 4\n"; + +$u->port(""); +print "not " unless $u eq "mms://66.250.188.13:/KFOG_FM" && $u->port == 1755; +print "ok 5\n"; + +$u->port(undef); +print "not " unless $u eq "mms://66.250.188.13/KFOG_FM"; +print "ok 6\n"; + +print "not " unless $u->host eq "66.250.188.13"; +print "ok 7\n"; + +print "not " unless $u->path eq "/KFOG_FM"; +print "ok 8\n"; diff --git a/t/news.t b/t/news.t new file mode 100644 index 0000000..a009a9e --- /dev/null +++ b/t/news.t @@ -0,0 +1,51 @@ +use strict; +use warnings; + +print "1..7\n"; + +use URI; + +my $u = URI->new("news:comp.lang.perl.misc"); + +print "not " unless $u->group eq "comp.lang.perl.misc" && + !defined($u->message) && + $u->port == 119 && + $u eq "news:comp.lang.perl.misc"; +print "ok 1\n"; + + +$u->host("news.online.no"); +print "not " unless $u->group eq "comp.lang.perl.misc" && + $u->port == 119 && + $u eq "news://news.online.no/comp.lang.perl.misc"; +print "ok 2\n"; + +$u->group("no.perl", 1 => 10); +print "not " unless $u eq "news://news.online.no/no.perl/1-10"; +print "ok 3\n"; + +my @g = $u->group; +#print "G: @g\n"; +print "not " unless @g == 3 && "@g" eq "no.perl 1 10"; +print "ok 4\n"; + +$u->message('42@g.aas.no'); +#print "$u\n"; +print "not " unless $u->message eq '42@g.aas.no' && + !defined($u->group) && + $u eq 'news://news.online.no/42@g.aas.no'; +print "ok 5\n"; + + +$u = URI->new("nntp:no.perl"); +print "not " unless $u->group eq "no.perl" && + $u->port == 119; +print "ok 6\n"; + +$u = URI->new("snews://snews.online.no/no.perl"); + +print "not " unless $u->group eq "no.perl" && + $u->host eq "snews.online.no" && + $u->port == 563; +print "ok 7\n"; + diff --git a/t/num_eq.t b/t/num_eq.t new file mode 100644 index 0000000..066d84c --- /dev/null +++ b/t/num_eq.t @@ -0,0 +1,16 @@ +# Test URI's overloading of numeric comparison for checking object +# equality + +use strict; +use warnings; +use Test::More 'no_plan'; + +use URI; + +my $uri1 = URI->new("http://foo.com"); +my $uri2 = URI->new("http://foo.com"); + +# cmp_ok() has a bug/misfeature where it strips overloading +# before doing the comparison. So use a regular ok(). +ok $uri1 == $uri1, "=="; +ok $uri1 != $uri2, "!="; diff --git a/t/old-absconf.t b/t/old-absconf.t new file mode 100644 index 0000000..536f4d7 --- /dev/null +++ b/t/old-absconf.t @@ -0,0 +1,38 @@ +use strict; +use warnings; + +print "1..6\n"; + +use URI::URL qw(url); + +# Test configuration via some global variables. + +$URI::URL::ABS_REMOTE_LEADING_DOTS = 1; +$URI::URL::ABS_ALLOW_RELATIVE_SCHEME = 1; + +my $u1 = url("../../../../abc", "http://web/a/b"); + +print "not " unless $u1->abs->as_string eq "http://web/abc"; +print "ok 1\n"; + +{ + local $URI::URL::ABS_REMOTE_LEADING_DOTS; + print "not " unless $u1->abs->as_string eq "http://web/../../../abc"; + print "ok 2\n"; +} + + +$u1 = url("http:../../../../abc", "http://web/a/b"); +print "not " unless $u1->abs->as_string eq "http://web/abc"; +print "ok 3\n"; + +{ + local $URI::URL::ABS_ALLOW_RELATIVE_SCHEME; + print "not " unless $u1->abs->as_string eq "http:../../../../abc"; + print "ok 4\n"; + print "not " unless $u1->abs(undef,1)->as_string eq "http://web/abc"; + print "ok 5\n"; +} + +print "not " unless $u1->abs(undef,0)->as_string eq "http:../../../../abc"; +print "ok 6\n"; diff --git a/t/old-base.t b/t/old-base.t new file mode 100644 index 0000000..77b562b --- /dev/null +++ b/t/old-base.t @@ -0,0 +1,978 @@ +use strict; +use warnings; + +use Test::More; +use URI::URL qw(url); +use URI::Escape qw(uri_escape uri_unescape); +use File::Temp 'tempdir'; + +# want compatibility +use URI::file; +$URI::file::DEFAULT_AUTHORITY = undef; + + +package main; + +# Must ensure that there is no relative paths in @INC because we will +# chdir in the newlocal tests. +unless ($^O eq "MacOS") { +chomp(my $pwd = ($^O =~ /mswin32/i ? `cd` : $^O eq 'VMS' ? `show default` : `pwd`)); +if ($^O eq 'VMS') { + $pwd =~ s#^\s+##; + $pwd = VMS::Filespec::unixpath($pwd); + $pwd =~ s#/$##; +} +for (@INC) { + my $x = $_; + $x = VMS::Filespec::unixpath($x) if $^O eq 'VMS'; + next if $x =~ m|^/| or $^O =~ /os2|mswin32/i + and $x =~ m#^(\w:[\\/]|[\\/]{2})#; + note "Turn lib path $x into $pwd/$x\n"; + $_ = "$pwd/$x"; + +} +} + +$| = 1; + +# Do basic tests first. + +note "Self tests for URI::URL version $URI::URL::VERSION...\n"; + +subtest 'scheme tests' => \&scheme_parse_test; + +subtest 'parts test' => \&parts_test; + +subtest 'escape test' => \&escape_test; + +subtest 'newlocal test' => \&newlocal_test; + +subtest 'Test relative/absolute URI::URL parsing' => \&absolute_test; + +subtest 'eq test' => \&eq_test; + +# Let's test making our own things +URI::URL::strict(0); +# This should work after URI::URL::strict(0) +my $url = new URI::URL "x-myscheme:something"; +# Since no implementor is registered for 'x-myscheme' then it will +# be handled by the URI::URL::_generic class +is($url->as_string, 'x-myscheme:something', ref($url) . '->as_string'); +is($url->path, 'something', ref($url) . '->path'); +URI::URL::strict(1); + +=comment + +# Let's try to make our URL subclass +{ + package MyURL; + @ISA = URI::URL::implementor(); + + sub _parse { + my($self, $init) = @_; + $self->URI::URL::_generic::_parse($init, qw(netloc path)); + } + + sub foo { + my $self = shift; + print ref($self)."->foo called for $self\n"; + } +} +# Let's say that it implements the 'x-a+b.c' scheme (alias 'x-foo') +URI::URL::implementor('x-a+b.c', 'MyURL'); +URI::URL::implementor('x-foo', 'MyURL'); + +# Now we are ready to try our new URL scheme +$url = new URI::URL 'x-a+b.c://foo/bar;a?b'; +is($url->as_string, 'x-a+b.c://foo/bar;a?b', ref($url) . '->as_string'); +is($url->path, '/bar;a?b', ref($url) . '->path'); +$url->foo; +$newurl = new URI::URL 'xxx', $url; +$newurl->foo; +$url = new URI::URL 'yyy', 'x-foo:'; +$url->foo; + +=cut + +# Test the new wash&go constructor +is(url("../foo.html", "http://www.sn.no/a/b")->abs->as_string, + 'http://www.sn.no/foo.html', 'wash&go'); + +note "URI::URL version $URI::URL::VERSION ok\n"; + +done_testing; +exit 0; + + + + +##################################################################### +# +# scheme_parse_test() +# +# test parsing and retrieval methods + +sub scheme_parse_test { + + my $tests = { + 'hTTp://web1.net/a/b/c/welcome#intro' + => { 'scheme'=>'http', 'host'=>'web1.net', 'port'=>80, + 'path'=>'/a/b/c/welcome', 'frag'=>'intro','query'=>undef, + 'epath'=>'/a/b/c/welcome', 'equery'=>undef, + 'params'=>undef, 'eparams'=>undef, + 'as_string'=>'http://web1.net/a/b/c/welcome#intro', + 'full_path' => '/a/b/c/welcome' }, + + 'http://web:1/a?query+text' + => { 'scheme'=>'http', 'host'=>'web', 'port'=>1, + 'path'=>'/a', 'frag'=>undef, 'query'=>'query+text' }, + + 'http://web.net/' + => { 'scheme'=>'http', 'host'=>'web.net', 'port'=>80, + 'path'=>'/', 'frag'=>undef, 'query'=>undef, + 'full_path' => '/', + 'as_string' => 'http://web.net/' }, + + 'http://web.net' + => { 'scheme'=>'http', 'host'=>'web.net', 'port'=>80, + 'path'=>'/', 'frag'=>undef, 'query'=>undef, + 'full_path' => '/', + 'as_string' => 'http://web.net/' }, + + 'http:0' + => { 'scheme'=>'http', 'path'=>'0', 'query'=>undef, + 'as_string'=>'http:0', 'full_path'=>'0', }, + + 'http:/0?0' + => { 'scheme'=>'http', 'path'=>'/0', 'query'=>'0', + 'as_string'=>'http:/0?0', 'full_path'=>'/0?0', }, + + 'http://0:0/0/0;0?0#0' + => { 'scheme'=>'http', 'host'=>'0', 'port'=>'0', + 'path' => '/0/0', 'query'=>'0', 'params'=>'0', + 'netloc'=>'0:0', + 'frag'=>0, 'as_string'=>'http://0:0/0/0;0?0#0' }, + + 'ftp://0%3A:%40@h:0/0?0' + => { 'scheme'=>'ftp', 'user'=>'0:', 'password'=>'@', + 'host'=>'h', 'port'=>'0', 'path'=>'/0?0', + 'query'=>'0', params=>undef, + 'netloc'=>'0%3A:%40@h:0', + 'as_string'=>'ftp://0%3A:%40@h:0/0?0' }, + + 'ftp://usr:pswd@web:1234/a/b;type=i' + => { 'host'=>'web', 'port'=>1234, 'path'=>'/a/b', + 'user'=>'usr', 'password'=>'pswd', + 'params'=>'type=i', + 'as_string'=>'ftp://usr:pswd@web:1234/a/b;type=i' }, + + 'ftp://host/a/b' + => { 'host'=>'host', 'port'=>21, 'path'=>'/a/b', + 'user'=>'anonymous', + 'as_string'=>'ftp://host/a/b' }, + + 'file://host/fseg/fs?g/fseg' + # don't escape ? for file: scheme + => { 'host'=>'host', 'path'=>'/fseg/fs', + 'as_string'=>'file://host/fseg/fs?g/fseg' }, + + 'gopher://host' + => { 'gtype'=>'1', 'as_string' => 'gopher://host', }, + + 'gopher://host/' + => { 'gtype'=>'1', 'as_string' => 'gopher://host/', }, + + 'gopher://gopher/2a_selector' + => { 'gtype'=>'2', 'selector'=>'a_selector', + 'as_string' => 'gopher://gopher/2a_selector', }, + + 'mailto:libwww-perl@ics.uci.edu' + => { 'address' => 'libwww-perl@ics.uci.edu', + 'encoded822addr'=> 'libwww-perl@ics.uci.edu', +# 'user' => 'libwww-perl', +# 'host' => 'ics.uci.edu', + 'as_string' => 'mailto:libwww-perl@ics.uci.edu', }, + + 'news:*' + => { 'groupart'=>'*', 'group'=>'*', as_string=>'news:*' }, + 'news:comp.lang.perl' + => { 'group'=>'comp.lang.perl' }, + 'news:perl-faq/module-list-1-794455075@ig.co.uk' + => { 'article'=> + 'perl-faq/module-list-1-794455075@ig.co.uk' }, + + 'nntp://news.com/comp.lang.perl/42' + => { 'group'=>'comp.lang.perl', }, #'digits'=>42 }, + + 'telnet://usr:pswd@web:12345/' + => { 'user'=>'usr', 'password'=>'pswd', 'host'=>'web' }, + 'rlogin://aas@a.sn.no' + => { 'user'=>'aas', 'host'=>'a.sn.no' }, +# 'tn3270://aas@ibm' +# => { 'user'=>'aas', 'host'=>'ibm', +# 'as_string'=>'tn3270://aas@ibm/'}, + +# 'wais://web.net/db' +# => { 'database'=>'db' }, +# 'wais://web.net/db?query' +# => { 'database'=>'db', 'query'=>'query' }, +# 'wais://usr:pswd@web.net/db/wt/wp' +# => { 'database'=>'db', 'wtype'=>'wt', 'wpath'=>'wp', +# 'password'=>'pswd' }, + }; + + foreach my $url_str (sort keys %$tests ){ + note "Testing '$url_str'\n"; + my $url = new URI::URL $url_str; + my $tests = $tests->{$url_str}; + while( my ($method, $exp) = each %$tests ){ + is($url->$method, $exp, ref($url) . "->$method"); + } + } +} + + +##################################################################### +# +# parts_test() (calls netloc_test test) +# +# Test individual component part access functions +# +sub parts_test { + + # test storage part access/edit methods (netloc, user, password, + # host and port are tested by &netloc_test) + + $url = new URI::URL 'file://web/orig/path'; + $url->scheme('http'); + $url->path('1info'); + $url->query('key words'); + $url->frag('this'); + is($url->as_string, 'http://web/1info?key%20words#this', ref($url) . '->as_string'); + + $url->epath('%2f/%2f'); + $url->equery('a=%26'); + is($url->full_path, '/%2f/%2f?a=%26', ref($url) . '->full_path'); + + # At this point it should be impossible to access the members path() + # and query() without complaints. + eval { my $p = $url->path; note "Path is $p\n"; }; + fail "Path exception failed" unless $@; + eval { my $p = $url->query; note "Query is $p\n"; }; + fail "Query exception failed" unless $@; + + # but we should still be able to set it + $url->path("howdy"); + is($url->as_string, 'http://web/howdy?a=%26#this', ref($url) . '->as_string'); + + # Test the path_components function + $url = new URI::URL 'file:%2f/%2f'; + my $p; + $p = join('-', $url->path_components); + fail "\$url->path_components returns '$p', expected '/-/'" + unless $p eq "/-/"; + $url->host("localhost"); + $p = join('-', $url->path_components); + fail "\$url->path_components returns '$p', expected '-/-/'" + unless $p eq "-/-/"; + $url->epath("/foo/bar/"); + $p = join('-', $url->path_components); + fail "\$url->path_components returns '$p', expected '-foo-bar-'" + unless $p eq "-foo-bar-"; + $url->path_components("", "/etc", "\0", "..", "øse", ""); + is($url->full_path, '/%2Fetc/%00/../%F8se/', ref($url) . '->full_path'); + + # Setting undef + $url = new URI::URL 'http://web/p;p?q#f'; + $url->epath(undef); + $url->equery(undef); + $url->eparams(undef); + $url->frag(undef); + is($url->as_string, 'http://web', ref($url) . '->as_string'); + + # Test http query access methods + $url->keywords('dog'); + is($url->as_string, 'http://web?dog', ref($url) . '->as_string'); + $url->keywords(qw(dog bones)); + is($url->as_string, 'http://web?dog+bones', ref($url) . '->as_string'); + $url->keywords(0,0); + is($url->as_string, 'http://web?0+0', ref($url) . '->as_string'); + $url->keywords('dog', 'bones', '#+='); + is($url->as_string, 'http://web?dog+bones+%23%2B%3D', ref($url) . '->as_string'); + $a = join(":", $url->keywords); + is($a, 'dog:bones:#+=', "\$url->keywords"); + # calling query_form is an error +# eval { my $foo = $url->query_form; }; +# fail "\$url->query_form should croak since query contains keywords not a form." +# unless $@; + + $url->query_form(a => 'foo', b => 'bar'); + is($url->as_string, 'http://web?a=foo&b=bar', ref($url) . '->as_string'); + my %a = $url->query_form; + is_deeply( + \%a, + { a => 'foo', b => 'bar' }, + "\$url->query_form", + ); + + $url->query_form(a => undef, a => 'foo', '&=' => '&=+'); + is($url->as_string, 'http://web?a=&a=foo&%26%3D=%26%3D%2B', ref($url) . '->as_string'); + + my @a = $url->query_form; + is(scalar(@a), 6, 'length'); + is_deeply( + \@a, + [ + 'a', '', + 'a', 'foo', + '&=', '&=+', + ], + 'query_form', + ); + + # calling keywords is an error +# eval { my $foo = $url->keywords; }; +# die "\$url->keywords should croak when query is a form" +# unless $@; + # Try this odd one + $url->equery('&=&=b&a=&a&a=b=c&&a=b'); + @a = $url->query_form; + #note join(":", @a), "\n"; + is(scalar(@a), 16, 'length'); + ok( + $a[4] eq "" && $a[5] eq "b" && $a[10] eq "a" && $a[11] eq "b=c", + 'sequence', + ); + + # Try array ref values in the key value pairs + $url->query_form(a => ['foo', 'bar'], b => 'foo', c => ['bar', 'foo']); + is($url->as_string, 'http://web?a=foo&a=bar&b=foo&c=bar&c=foo', ref($url) . '->as_string'); + + subtest 'netloc_test' => \&netloc_test; + subtest 'port_test' => \&port_test; + + $url->query(undef); + is($url->query, undef, ref($url) . '->as_string'); + + $url = new URI::URL 'gopher://gopher/'; + $url->port(33); + $url->gtype("3"); + $url->selector("S"); + $url->search("query"); + is($url->as_string, 'gopher://gopher:33/3S%09query', ref($url) . '->as_string'); + + $url->epath("45%09a"); + is($url->gtype, '4', ref($url) . '->as_string'); + is($url->selector, '5', ref($url) . '->as_string'); + is($url->search, 'a', ref($url) . '->as_string'); + is($url->string, undef, ref($url) . '->as_string'); + is($url->path, "/45\ta", ref($url) . '->as_string'); +# $url->path("00\t%09gisle"); +# is($url->search '%09gisle', ref($url) . '->search'); + + # Let's test som other URL schemes + $url = new URI::URL 'news:'; + $url->group("comp.lang.perl.misc"); + is($url->as_string, 'news:comp.lang.perl.misc', ref($url) . '->as_string'); + $url->article('<1234@a.sn.no>'); + is($url->as_string, 'news:1234@a.sn.no', ref($url) . '->as_string: "<" and ">" are gone'); + + # This one should be illegal + eval { $url->article("no.perl"); }; + die "This one should really complain" unless $@; + +# $url = new URI::URL 'mailto:'; +# $url->user("aas"); +# $url->host("a.sn.no"); +# is($url->as_string, 'mailto:aas@a.sn.no', ref($url) . '->as_string'); +# $url->address('foo@bar'); +# is($url->host, 'bar', ref($url) . '->as_string'); +# is($url->user, 'foo', ref($url) . '->as_string'); + +# $url = new URI::URL 'wais://host/database/wt/wpath'; +# $url->database('foo'); +# is($url->as_string, 'wais://host/foo/wt/wpath', ref($url) . '->as_string'); +# $url->wtype('bar'); +# is($url->as_string, 'wais://host/foo/bar/wpath', ref($url) . '->as_string'); + + # Test crack method for various URLs + my(@crack, $crack); + @crack = URI::URL->new("http://host/path;param?query#frag")->crack; + is(scalar(@crack), 9, '9 elements'); + $crack = join("*", map { defined($_) ? $_ : "UNDEF" } @crack); + note "Cracked result: $crack"; + is($crack, "http*UNDEF*UNDEF*host*80*/path*param*query*frag", 'crack result'); + + @crack = URI::URL->new("foo/bar", "ftp://aas\@ftp.sn.no/")->crack; + is(scalar(@crack), 9, '9 elements'); + $crack = join("*", map { defined($_) ? $_ : "UNDEF" } @crack); + note "Cracked result: $crack"; +# die "Bad crack result" unless +# $crack eq "ftp*UNDEF*UNDEF*UNDEF*21*foo/bar*UNDEF*UNDEF*UNDEF"; + + @crack = URI::URL->new('ftp://u:p@host/q?path')->crack; + is(scalar(@crack), 9, '9 elements'); + $crack = join("*", map { defined($_) ? $_ : "UNDEF" } @crack); + note "Cracked result: $crack"; + is($crack, "ftp*u*p*host*21*/q?path*UNDEF*path*UNDEF", 'crack result'); + + @crack = URI::URL->new("ftp://ftp.sn.no/pub")->crack; # Test anon ftp + is(scalar(@crack), 9, '9 elements'); + ok($crack[2], "passwd in anonymous crack"); + $crack[2] = 'passwd'; # easier to test when we know what it is + $crack = join("*", map { defined($_) ? $_ : "UNDEF" } @crack); + note "Cracked result: $crack"; + is($crack, "ftp*anonymous*passwd*ftp.sn.no*21*/pub*UNDEF*UNDEF*UNDEF", 'crack result'); + + @crack = URI::URL->new('mailto:aas@sn.no')->crack; + is(scalar(@crack), 9, '9 elements'); + $crack = join("*", map { defined($_) ? $_ : "UNDEF" } @crack); + note "Cracked result: $crack"; +# die "Bad crack result" unless +# $crack eq "mailto*aas*UNDEF*sn.no*UNDEF*aas\@sn.no*UNDEF*UNDEF*UNDEF"; + + @crack = URI::URL->new('news:comp.lang.perl.misc')->crack; + is(scalar(@crack), 9, '9 elements'); + $crack = join("*", map { defined($_) ? $_ : "UNDEF" } @crack); + note "Cracked result: $crack"; + is($crack, "news*UNDEF*UNDEF*UNDEF*119*comp.lang.perl.misc*UNDEF*UNDEF*UNDEF", 'crack result'); +} + +# +# netloc_test() +# +# Test automatic netloc synchronisation +# +sub netloc_test { + + my $url = new URI::URL 'ftp://anonymous:p%61ss@håst:12345'; + is($url->user, 'anonymous', ref($url) . '->as_string'); + is($url->password, 'pass', ref($url) . '->as_string'); + is($url->host, 'xn--hst-ula', ref($url) . '->as_string'); + is($url->port, 12345, ref($url) . '->as_string'); + # Can't really know how netloc is represented since it is partially escaped + #is($url->netloc, 'anonymous:pass@hst:12345', ref($url) . '->as_string'); + is($url->as_string, 'ftp://anonymous:pass@xn--hst-ula:12345', ref($url) . '->as_string'); + + # The '0' is sometimes tricky to get right + $url->user(0); + $url->password(0); + $url->host(0); + $url->port(0); + is($url->netloc, '0:0@0:0', ref($url) . '->as_string'); + $url->host(undef); + is($url->netloc, '0:0@:0', ref($url) . '->as_string'); + $url->host('h'); + $url->user(undef); + is($url->netloc, ':0@h:0', ref($url) . '->as_string'); + $url->user(''); + is($url->netloc, ':0@h:0', ref($url) . '->as_string'); + $url->password(''); + is($url->netloc, ':@h:0', ref($url) . '->as_string'); + $url->user('foo'); + is($url->netloc, 'foo:@h:0', ref($url) . '->as_string'); + + # Let's try a simple one + $url->user('nemo'); + $url->password('p2'); + $url->host('hst2'); + $url->port(2); + is($url->netloc, 'nemo:p2@hst2:2', ref($url) . '->as_string'); + + $url->user(undef); + $url->password(undef); + $url->port(undef); + is($url->netloc, 'hst2', ref($url) . '->as_string'); + is($url->port, '21', ref($url) . '->as_string'); # the default ftp port + + $url->port(21); + is($url->netloc, 'hst2:21', ref($url) . '->as_string'); + + # Let's try some reserved chars + $url->user("@"); + $url->password(":-#-;-/-?"); + is($url->as_string, 'ftp://%40::-%23-;-%2F-%3F@hst2:21', ref($url) . '->as_string'); + +} + +# +# port_test() +# +# Test port behaviour +# +sub port_test { + $url = URI::URL->new('http://foo/root/dir/'); + my $port = $url->port; + is($port, 80, 'port'); + is($url->as_string, 'http://foo/root/dir/', 'string'); + + $url->port(8001); + $port = $url->port; + is($port, 8001, 'port'); + is($url->as_string, 'http://foo:8001/root/dir/', 'string'); + + $url->port(80); + $port = $url->port; + is($port, 80, 'port'); + is($url->canonical->as_string, 'http://foo/root/dir/', 'string'); + + $url->port(8001); + $url->port(undef); + $port = $url->port; + is($port, 80, 'port'); + is($url->canonical->as_string, 'http://foo/root/dir/', 'string'); +} + + +##################################################################### +# +# escape_test() +# +# escaping functions + +sub escape_test { + # supply escaped URL + $url = new URI::URL 'http://web/this%20has%20spaces'; + # check component is unescaped + is($url->path, '/this has spaces', ref($url) . '->as_string'); + + # modify the unescaped form + $url->path('this ALSO has spaces'); + # check whole url is escaped + is($url->as_string, + 'http://web/this%20ALSO%20has%20spaces', ref($url) . '->as_string'); + + $url = new URI::URL uri_escape('http://web/try %?#" those'); + is($url->as_string, + 'http%3A%2F%2Fweb%2Ftry%20%25%3F%23%22%20those', ref($url) . '->as_string'); + + my $all = pack('C*',0..255); + my $esc = uri_escape($all); + my $new = uri_unescape($esc); + is($all, $new, "uri_escape->uri_unescape"), + + $url->path($all); + is($url->full_path, q(%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20!%22%23$%&'()*+,-./0123456789:;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ[%5C]%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%7F%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF), ref($url) . '->as_string'); + + # test escaping uses uppercase (preferred by rfc1837) + $url = new URI::URL 'file://h/'; + $url->path(chr(0x7F)); + is($url->as_string, 'file://h/%7F', ref($url) . '->as_string'); + + return; + # reserved characters differ per scheme + + ## XXX is this '?' allowed to be unescaped + $url = new URI::URL 'file://h/test?ing'; + is($url->path, '/test?ing', ref($url) . '->as_string'); + + $url = new URI::URL 'file://h/'; + $url->epath('question?mark'); + is($url->as_string, 'file://h/question?mark', ref($url) . '->as_string'); + # XXX Why should this be any different??? + # Perhaps we should not expect too much :-) + $url->path('question?mark'); + is($url->as_string, 'file://h/question%3Fmark', ref($url) . '->as_string'); + + # See what happens when set different elements to this ugly sting + my $reserved = ';/?:@&=#%'; + $url->path($reserved . "foo"); + is($url->as_string, 'file://h/%3B/%3F%3A%40%26%3D%23%25foo', ref($url) . '->as_string'); + + $url->scheme('http'); + $url->path(''); + is($url->as_string, 'http://h/', ref($url) . '->as_string'); + $url->query($reserved); + $url->params($reserved); + $url->frag($reserved); + is($url->as_string, 'http://h/;%3B%2F%3F%3A%40&=%23%25?%3B%2F%3F%3A%40&=%23%25#;/?:@&=#%', ref($url) . '->as_string'); + + my $str = $url->as_string; + $url = new URI::URL $str; + die "URL changed" if $str ne $url->as_string; + + $url = new URI::URL 'ftp:foo'; + $url->user($reserved); + $url->host($reserved); + is($url->as_string, 'ftp://%3B%2F%3F%3A%40%26%3D%23%25@%3B%2F%3F%3A%40%26%3D%23%25/foo', ref($url) . '->as_string'); + +} + + +##################################################################### +# +# newlocal_test() +# + +sub newlocal_test { + return 1 if $^O eq "MacOS"; + + my $isMSWin32 = ($^O =~ /MSWin32/i); + my $pwd = ($isMSWin32 ? 'cd' : + ($^O eq 'qnx' ? '/usr/bin/fullpath -t' : + ($^O eq 'VMS' ? 'show default' : + (-e '/bin/pwd' ? '/bin/pwd' : 'pwd')))); + my $tmpdir = tempdir(); + if ( $^O eq 'qnx' ) { + $tmpdir = `/usr/bin/fullpath -t $tmpdir`; + chomp $tmpdir; + } + $tmpdir = '/sys$scratch' if $^O eq 'VMS'; + $tmpdir =~ tr|\\|/|; + + my $savedir = `$pwd`; # we don't use Cwd.pm because we want to check + # that it get require'd correctly by URL.pm + chomp $savedir; + if ($^O eq 'VMS') { + $savedir =~ s#^\s+##; + $savedir = VMS::Filespec::unixpath($savedir); + $savedir =~ s#/$##; + } + + # cwd + chdir($tmpdir) or die $!; + my $dir = `$pwd`; $dir =~ tr|\\|/|; + chomp $dir; + if ($^O eq 'VMS') { + $dir =~ s#^\s+##; + $dir = VMS::Filespec::unixpath($dir); + $dir =~ s#/$##; + } + $dir = uri_escape($dir, ':'); + $dir =~ s/^(\w)%3A/$1:/ if $isMSWin32 or $^O eq 'os2'; + $url = newlocal URI::URL; + my $ss = $isMSWin32 ? '//' : (($dir =~ m,^/,) ? '' : '///' ); + is($url->as_string, URI::URL->new("file:$ss$dir/")->as_string, ref($url) . '->as_string'); + + note "Local directory is ". $url->local_path . "\n"; + + if ($^O ne 'VMS') { + # absolute dir + chdir('/') or die $!; + $url = newlocal URI::URL '/usr/'; + is($url->as_string, 'file:/usr/', ref($url) . '->as_string'); + + # absolute file + $url = newlocal URI::URL '/vmunix'; + is($url->as_string, 'file:/vmunix', ref($url) . '->as_string'); + } + + # relative file + chdir($tmpdir) or fail $!; + $dir = `$pwd`; $dir =~ tr|\\|/|; + chomp $dir; + if ($^O eq 'VMS') { + $dir =~ s#^\s+##; + $dir = VMS::Filespec::unixpath($dir); + $dir =~ s#/$##; + } + $dir = uri_escape($dir, ':'); + $dir =~ s/^(\w)%3A/$1:/ if $isMSWin32 or $^O eq 'os2'; + $url = newlocal URI::URL 'foo'; + is($url->as_string, "file:$ss$dir/foo", ref($url) . '->as_string'); + + # relative dir + chdir($tmpdir) or fail $!; + $dir = `$pwd`; $dir =~ tr|\\|/|; + chomp $dir; + if ($^O eq 'VMS') { + $dir =~ s#^\s+##; + $dir = VMS::Filespec::unixpath($dir); + $dir =~ s#/$##; + } + $dir = uri_escape($dir, ':'); + $dir =~ s/^(\w)%3A/$1:/ if $isMSWin32 or $^O eq 'os2'; + $url = newlocal URI::URL 'bar/'; + is($url->as_string, "file:$ss$dir/bar/", ref($url) . '->as_string'); + + # 0 + if ($^O ne 'VMS') { + chdir('/') or fail $!; + $dir = `$pwd`; $dir =~ tr|\\|/|; + chomp $dir; + $dir = uri_escape($dir, ':'); + $dir =~ s/^(\w)%3A/$1:/ if $isMSWin32 or $^O eq 'os2'; + $url = newlocal URI::URL '0'; + is($url->as_string, "file:$ss${dir}0", ref($url) . '->as_string'); + } + + # Test access methods for file URLs + $url = new URI::URL 'file:/c:/dos'; + is($url->dos_path, 'C:\\DOS', ref($url) . '->as_string'); + is($url->unix_path, '/c:/dos', ref($url) . '->as_string'); + #is($url->vms_path, '[C:]DOS', ref($url) . '->as_string'); + is($url->mac_path, undef, ref($url) . '->as_string'); + + $url = new URI::URL 'file:/foo/bar'; + is($url->unix_path, '/foo/bar', ref($url) . '->as_string'); + is($url->mac_path, 'foo:bar', ref($url) . '->as_string'); + + # Some edge cases +# $url = new URI::URL 'file:'; +# is($url->unix_path, '/', ref($url) . '->as_string'); + $url = new URI::URL 'file:/'; + is($url->unix_path, '/', ref($url) . '->as_string'); + $url = new URI::URL 'file:.'; + is($url->unix_path, '.', ref($url) . '->as_string'); + $url = new URI::URL 'file:./foo'; + is($url->unix_path, './foo', ref($url) . '->as_string'); + $url = new URI::URL 'file:0'; + is($url->unix_path, '0', ref($url) . '->as_string'); + $url = new URI::URL 'file:../../foo'; + is($url->unix_path, '../../foo', ref($url) . '->as_string'); + $url = new URI::URL 'file:foo/../bar'; + is($url->unix_path, 'foo/../bar', ref($url) . '->as_string'); + + # Relative files + $url = new URI::URL 'file:foo/b%61r/Note.txt'; + is($url->unix_path, 'foo/bar/Note.txt', ref($url) . '->as_string'); + is($url->mac_path, ':foo:bar:Note.txt', ref($url) . '->as_string'); + is($url->dos_path, 'FOO\\BAR\\NOTE.TXT', ref($url) . '->as_string'); + #is($url->vms_path', '[.FOO.BAR]NOTE.TXT', ref($url) . '->as_string'); + + # The VMS path found in RFC 1738 (section 3.10) + $url = new URI::URL 'file://vms.host.edu/disk$user/my/notes/note12345.txt'; +# is($url->vms_path, 'DISK$USER:[MY.NOTES]NOTE12345.TXT', ref($url) . '->as_string'); +# is($url->mac_path, 'disk$user:my:notes:note12345.txt', ref($url) . '->as_string'); + + chdir($savedir) or fail $!; +} + + +##################################################################### +# +# absolute_test() +# +sub absolute_test { + # Tests from draft-ietf-uri-relative-url-06.txt + # Copied verbatim from the draft, parsed below + + @URI::URL::g::ISA = qw(URI::URL::_generic); # for these tests + + my $base = 'http://a/b/c/d;p?q#f'; + + my $absolute_tests = < + g = + ./g = + g/ = + /g = + //g = +# ?y = + g?y = + g?y/./x = + #s = + g#s = + g#s/./x = + g?y#s = + # ;x = + g;x = + g;x?y#s = + . = + ./ = + .. = + ../ = + ../g = + ../.. = + ../../ = + ../../g = + +5.2. Abnormal Examples + + Although the following abnormal examples are unlikely to occur + in normal practice, all URL parsers should be capable of resolving + them consistently. Each example uses the same base as above. + + An empty reference resolves to the complete base URL: + + <> = + + Parsers must be careful in handling the case where there are more + relative path ".." segments than there are hierarchical levels in + the base URL's path. Note that the ".." syntax cannot be used to + change the of a URL. + + ../../../g = + ../../../../g = + + Similarly, parsers must avoid treating "." and ".." as special + when they are not complete components of a relative path. + + /./g = + /../g = + g. = + .g = + g.. = + ..g = + + Less likely are cases where the relative URL uses unnecessary or + nonsensical forms of the "." and ".." complete path segments. + + ./../g = + ./g/. = + g/./h = + g/../h = + + Finally, some older parsers allow the scheme name to be present in + a relative URL if it is the same as the base URL scheme. This is + considered to be a loophole in prior specifications of partial + URLs [1] and should be avoided by future parsers. + + http:g = + http: = +EOM + # convert text to list like + # @absolute_tests = ( ['g:h' => 'g:h'], ...) + + my @absolute_tests; + for my $line (split("\n", $absolute_tests)) { + next unless $line =~ /^\s{6}/; + if ($line =~ /^\s+(\S+)\s*=\s*]*)>/) { + my($rel, $abs) = ($1, $2); + $rel = '' if $rel eq '<>'; + push(@absolute_tests, [$rel, $abs]); + } + else { + warn "illegal line '$line'"; + } + } + + # add some extra ones for good measure + + push(@absolute_tests, ['x/y//../z' => 'http://a/b/c/x/y/z'], + ['1' => 'http://a/b/c/1' ], + ['0' => 'http://a/b/c/0' ], + ['/0' => 'http://a/0' ], +# ['%2e/a' => 'http://a/b/c/%2e/a'], # %2e is '.' +# ['%2e%2e/a' => 'http://a/b/c/%2e%2e/a'], + ); + + note " Relative + Base => Expected Absolute URL"; + note "================================================\n"; + for my $test (@absolute_tests) { + my($rel, $abs) = @$test; + my $abs_url = new URI::URL $abs; + my $abs_str = $abs_url->as_string; + + note sprintf(" %-10s + $base => %s", $rel, $abs); + my $u = new URI::URL $rel, $base; + my $got = $u->abs; + is($got->as_string, $abs_str, ref($url) . '->as_string'); + } + + # bug found and fixed in 1.9 by "J.E. Fritz" + $base = new URI::URL 'http://host/directory/file'; + my $relative = new URI::URL 'file', $base; + my $result = $relative->abs; + + my ($a, $b) = ($base->path, $result->path); + is($a, $b, 'identity'); + + # Counter the expectation of least surprise, + # section 6 of the draft says the URL should + # be canonicalised, rather than making a simple + # substitution of the last component. + # Better doublecheck someone hasn't "fixed this bug" :-) + $base = new URI::URL 'http://host/dir1/../dir2/file'; + $relative = new URI::URL 'file', $base; + $result = $relative->abs; + is($result, 'http://host/dir2/file', 'URL canonicalised'); + + note "--------"; + # Test various other kinds of URLs and how they like to be absolutized + for (["http://abc/", "news:45664545", "http://abc/"], + ["news:abc", "http://abc/", "news:abc"], + ["abc", "file:/test?aas", "file:/abc"], +# ["gopher:", "", "gopher:"], +# ["?foo", "http://abc/a", "http://abc/a?foo"], + ["?foo", "file:/abc", "file:/abc?foo"], + ["#foo", "http://abc/a", "http://abc/a#foo"], + ["#foo", "file:a", "file:a#foo"], + ["#foo", "file:/a", "file:/a#foo"], + ["#foo", "file:/a", "file:/a#foo"], + ["#foo", "file://localhost/a", "file://localhost/a#foo"], + ['123@sn.no', "news:comp.lang.perl.misc", 'news:/123@sn.no'], + ['no.perl', 'news:123@sn.no', 'news:/no.perl'], + ['mailto:aas@a.sn.no', "http://www.sn.no/", 'mailto:aas@a.sn.no'], + + # Test absolutizing with old behaviour. + ['http:foo', 'http://h/a/b', 'http://h/a/foo'], + ['http:/foo', 'http://h/a/b', 'http://h/foo'], + ['http:?foo', 'http://h/a/b', 'http://h/a/b?foo'], + ['http:#foo', 'http://h/a/b', 'http://h/a/b#foo'], + ['http:?foo#bar','http://h/a/b', 'http://h/a/b?foo#bar'], + ['file:/foo', 'http://h/a/b', 'file:/foo'], + + ) + { + my($url, $base, $expected_abs) = @$_; + my $rel = new URI::URL $url, $base; + my $abs = $rel->abs($base, 1); + note sprintf(" %-12s+ $base => %s", $rel, $abs); + is($abs->as_string, $expected_abs, ref($url) . '->as_string'); + } + note "absolute test ok\n"; + + # Test relative function + for ( + ["http://abc/a", "http://abc", "a"], + ["http://abc/a", "http://abc/b", "a"], + ["http://abc/a?q", "http://abc/b", "a?q"], + ["http://abc/a;p", "http://abc/b", "a;p"], + ["http://abc/a", "http://abc/a/b/c/", "../../../a"], + ["http://abc/a/", "http://abc/a/", "./"], + ["http://abc/a#f", "http://abc/a", "#f"], + + ["file:/etc/motd", "file:/", "etc/motd"], + ["file:/etc/motd", "file:/etc/passwd", "motd"], + ["file:/etc/motd", "file:/etc/rc2.d/", "../motd"], + ["file:/etc/motd", "file:/usr/lib/doc", "../../etc/motd"], + ["file:", "file:/etc/", "../"], + ["file:foo", "file:/etc/", "../foo"], + + ["mailto:aas", "http://abc", "mailto:aas"], + + # Nicolai Langfeldt's original example + ["http://www.math.uio.no/doc/mail/top.html", + "http://www.math.uio.no/doc/linux/", "../mail/top.html"], + ) + { + my($abs, $base, $expect) = @$_; + my $rel = URI::URL->new($abs, $base)->rel; + is($rel->as_string, $expect, "url('$abs', '$base')->rel = '$expect'"); + } + note "relative test ok\n"; +} + + +sub eq_test +{ + my $u1 = new URI::URL 'http://abc.com:80/~smith/home.html'; + my $u2 = new URI::URL 'http://ABC.com/%7Esmith/home.html'; + my $u3 = new URI::URL 'http://ABC.com:/%7esmith/home.html'; + + # Test all permutations of these tree + ok($u1->eq($u2), "1: $u1 ne $u2"); + ok($u1->eq($u3), "2: $u1 ne $u3"); + ok($u2->eq($u1), "3: $u2 ne $u1"); + ok($u2->eq($u3), "4: $u2 ne $u3"); + ok($u3->eq($u1), "5: $u3 ne $u1"); + ok($u3->eq($u2), "6: $u3 ne $u2"); + + # Test empty path + my $u4 = new URI::URL 'http://www.sn.no'; + ok($u4->eq("HTTP://WWW.SN.NO:80/"), "7: $u4"); + ok(!$u4->eq("http://www.sn.no:81"),"8: $u4"); + + # Test mailto +# my $u5 = new URI::URL 'mailto:AAS@SN.no'; +# ok($u5->eq('mailto:aas@sn.no'), "9: $u5"); + + + # Test reserved char + my $u6 = new URI::URL 'ftp://ftp/%2Fetc'; + ok($u6->eq("ftp://ftp/%2fetc"), "10: $u6"); + ok(!$u6->eq("ftp://ftp://etc"), "11: $u6"); +} diff --git a/t/old-file.t b/t/old-file.t new file mode 100644 index 0000000..e1ab8f5 --- /dev/null +++ b/t/old-file.t @@ -0,0 +1,81 @@ +use strict; +use warnings; + +use URI::file; +$URI::file::DEFAULT_AUTHORITY = undef; + +my @tests = ( +[ "file", "unix", "win32", "mac" ], +#---------------- ------------ --------------- -------------- +[ "file://localhost/foo/bar", + "!/foo/bar", "!\\foo\\bar", "!foo:bar", ], +[ "file:///foo/bar", + "!/foo/bar", "!\\foo\\bar", "!foo:bar", ], +[ "file:/foo/bar", "/foo/bar", "\\foo\\bar", "foo:bar", ], +[ "foo/bar", "foo/bar", "foo\\bar", ":foo:bar",], +[ "file://foo3445x/bar","!//foo3445x/bar", "\\\\foo3445x\\bar", "!foo3445x:bar"], +[ "file://a:/", "!//a:/", "!A:\\", undef], +[ "file:/", "/", "\\", undef], +[ "file://A:relative/", "!//A:relative/", "A:", undef], +[ ".", ".", ".", ":"], +[ "..", "..", "..", "::"], +[ "%2E", "!.", "!.", ":."], +[ "../%2E%2E", "!../..", "!..\\..", "::.."], +); +if ($^O eq "MacOS") { +my @extratests = ( +[ "../..", "../..", "..\\..", ":::"], +[ "../../", "../../", "..\\..\\", "!:::"], +[ "file:./foo.bar", "!./foo.bar", "!.\\foo.bar", "!:foo.bar"], +[ "file:/%2Ffoo/bar", undef, undef, "/foo:bar"], +[ "file:/.%2Ffoo/bar", undef, undef, "./foo:bar"], +[ "file:/fee/.%2Ffoo%2Fbar", undef, undef, "fee:./foo/bar"], +[ "file:/.%2Ffoo%2Fbar/", undef, undef, "./foo/bar:"], +[ "file:/.%2Ffoo%2Fbar", undef, undef, "!./foo/bar:"], +[ "file:/%2E%2E/foo", "!/../foo", "!\\..\\foo" , "..:foo"], +[ "file:/bar/%2E/foo", "!/bar/./foo", "!\\bar\\.\\foo", "bar:.:foo"], +[ "file:/foo/../bar", "/foo/../bar", "\\foo\\..\\bar", "foo::bar"], +[ "file:/a/b/../../c/d", "/a/b/../../c/d", "\\a\\b\\..\\..\\c\\d", "a:b:::c:d"], +); + push(@tests,@extratests); +} + +my @os = @{shift @tests}; +shift @os; # file + +my $num = @tests; +print "1..$num\n"; + +my $testno = 1; + +for my $t (@tests) { + my @t = @$t; + my $file = shift @t; + my $err; + + my $u = URI->new($file, "file"); + my $i = 0; + for my $os (@os) { + my $f = $u->file($os); + my $expect = $t[$i]; + $f = "" unless defined $f; + $expect = "" unless defined $expect; + my $loose; + $loose++ if $expect =~ s/^!//; + if ($expect ne $f) { + print "URI->new('$file', 'file')->file('$os') ne $expect, but $f\n"; + $err++; + } + if (defined($t[$i]) && !$loose) { + my $u2 = URI::file->new($t[$i], $os); + unless ($u2->as_string eq $file) { + print "URI::file->new('$t[$i]', '$os') ne $file, but $u2\n"; + $err++; + } + } + $i++; + } + print "not " if $err; + print "ok $testno\n"; + $testno++; +} diff --git a/t/old-relbase.t b/t/old-relbase.t new file mode 100644 index 0000000..3bd0ae8 --- /dev/null +++ b/t/old-relbase.t @@ -0,0 +1,37 @@ +use strict; +use warnings; + +print "1..5\n"; + +use URI::URL; + +# We used to have problems with URLs that used a base that was +# not absolute itself. + +my $u1 = url("/foo/bar", "http://www.acme.com/"); +my $u2 = url("../foo/", $u1); +my $u3 = url("zoo/foo", $u2); + +my $a1 = $u1->abs->as_string; +my $a2 = $u2->abs->as_string; +my $a3 = $u3->abs->as_string; + +print "$a1\n$a2\n$a3\n"; + +print "not " unless $a1 eq "http://www.acme.com/foo/bar"; +print "ok 1\n"; +print "not " unless $a2 eq "http://www.acme.com/foo/"; +print "ok 2\n"; +print "not " unless $a3 eq "http://www.acme.com/foo/zoo/foo"; +print "ok 3\n"; + +# We used to have problems with URI::URL as the base class :-( +my $u4 = url("foo", "URI::URL"); +my $a4 = $u4->abs; +print "$a4\n"; +print "not " unless $u4 eq "foo" && $a4 eq "uri:/foo"; +print "ok 4\n"; + +# Test new_abs for URI::URL objects +print "not " unless URI::URL->new_abs("foo", "http://foo/bar") eq "http://foo/foo"; +print "ok 5\n"; diff --git a/t/path-segments.t b/t/path-segments.t new file mode 100755 index 0000000..ea9b4fa --- /dev/null +++ b/t/path-segments.t @@ -0,0 +1,33 @@ +use strict; +use warnings; + +use Test::More 'no_plan'; + +use URI (); + +{ + my $u = URI->new("http://www.example.org/a/b/c"); + + is_deeply [$u->path_segments], ['', qw(a b c)], 'path_segments in list context'; + is $u->path_segments, '/a/b/c', 'path_segments in scalar context'; + + is_deeply [$u->path_segments('', qw(z y x))], ['', qw(a b c)], 'set path_segments in list context'; + is $u->path_segments('/i/j/k'), '/z/y/x', 'set path_segments in scalar context'; + + $u->path_segments('', qw(q r s)); + is $u->path_segments, '/q/r/s', 'set path_segments in void context'; +} + +{ + my $u = URI->new("http://www.example.org/abc"); + $u->path_segments('', '%', ';', '/'); + is $u->path_segments, '/%25/%3B/%2F', 'escaping special characters'; +} + +{ + my $u = URI->new("http://www.example.org/abc;param1;param2"); + my @ps = $u->path_segments; + isa_ok $ps[1], 'URI::_segment'; + $u->path_segments(@ps); + is $u->path_segments, '/abc;param1;param2', 'dealing with URI segments'; +} diff --git a/t/pop.t b/t/pop.t new file mode 100644 index 0000000..4519484 --- /dev/null +++ b/t/pop.t @@ -0,0 +1,50 @@ +use strict; +use warnings; + +print "1..8\n"; + +use URI; + +my $u = URI->new('pop://aas@pop.sn.no'); + +print "not " unless $u->user eq "aas" && + !defined($u->auth) && + $u->host eq "pop.sn.no" && + $u->port == 110 && + $u eq 'pop://aas@pop.sn.no'; +print "ok 1\n"; + +$u->auth("+APOP"); +print "not " unless $u->auth eq "+APOP" && + $u eq 'pop://aas;AUTH=+APOP@pop.sn.no'; +print "ok 2\n"; + +$u->user("gisle"); +print "not " unless $u->user eq "gisle" && + $u eq 'pop://gisle;AUTH=+APOP@pop.sn.no'; +print "ok 3\n"; + +$u->port(4000); +print "not " unless $u eq 'pop://gisle;AUTH=+APOP@pop.sn.no:4000'; +print "ok 4\n"; + +$u = URI->new("pop:"); +$u->host("pop.sn.no"); +$u->user("aas"); +$u->auth("*"); +print "not " unless $u eq 'pop://aas;AUTH=*@pop.sn.no'; +print "ok 5\n"; + +$u->auth(undef); +print "not " unless $u eq 'pop://aas@pop.sn.no'; +print "ok 6\n"; + +$u->user(undef); +print "not " unless $u eq 'pop://pop.sn.no'; +print "ok 7\n"; + +# Try some funny characters too +$u->user('får;k@l'); +print "not " unless $u->user eq 'får;k@l' && + $u eq 'pop://f%E5r%3Bk%40l@pop.sn.no'; +print "ok 8\n"; diff --git a/t/punycode.t b/t/punycode.t new file mode 100644 index 0000000..d1e3084 --- /dev/null +++ b/t/punycode.t @@ -0,0 +1,56 @@ +use strict; +use warnings; + +use utf8; +use Test::More tests => 15; +use URI::_punycode qw(encode_punycode decode_punycode); + +my %RFC_3492 = ( + A => { + unicode => udecode("u+0644 u+064A u+0647 u+0645 u+0627 u+0628 u+062A u+0643 u+0644 u+0645 u+0648 u+0634 u+0639 u+0631 u+0628 u+064A u+061F"), + ascii => "egbpdaj6bu4bxfgehfvwxn", + }, + B => { + unicode => udecode("u+4ED6 u+4EEC u+4E3A u+4EC0 u+4E48 u+4E0D u+8BF4 u+4E2D u+6587"), + ascii => "ihqwcrb4cv8a8dqg056pqjye", + }, + E => { + unicode => udecode("u+05DC u+05DE u+05D4 u+05D4 u+05DD u+05E4 u+05E9 u+05D5 u+05D8 u+05DC u+05D0 u+05DE u+05D3 u+05D1 u+05E8 u+05D9 u+05DD u+05E2 u+05D1 u+05E8 u+05D9 u+05EA"), + ascii => "4dbcagdahymbxekheh6e0a7fei0b", + }, + J => { + unicode => udecode("U+0050 u+006F u+0072 u+0071 u+0075 u+00E9 u+006E u+006F u+0070 u+0075 u+0065 u+0064 u+0065 u+006E u+0073 u+0069 u+006D u+0070 u+006C u+0065 u+006D u+0065 u+006E u+0074 u+0065 u+0068 u+0061 u+0062 u+006C u+0061 u+0072 u+0065 u+006E U+0045 u+0073 u+0070 u+0061 u+00F1 u+006F u+006C"), + ascii => "PorqunopuedensimplementehablarenEspaol-fmd56a", + }, + K => { + unicode => udecode("U+0054 u+1EA1 u+0069 u+0073 u+0061 u+006F u+0068 u+1ECD u+006B u+0068 u+00F4 u+006E u+0067 u+0074 u+0068 u+1EC3 u+0063 u+0068 u+1EC9 u+006E u+00F3 u+0069 u+0074 u+0069 u+1EBF u+006E u+0067 U+0056 u+0069 u+1EC7 u+0074"), + ascii => "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", + }, + O => { + unicode => udecode("u+3072 u+3068 u+3064 u+5C4B u+6839 u+306E u+4E0B u+0032"), + ascii => "2-u9tlzr9756bt3uc0v", + }, + S => { + unicode => "\$1.00", + ascii => "\$1.00", + }, +); + +is encode_punycode("bücher"), "bcher-kva", "http://en.wikipedia.org/wiki/Punycode example encode"; +is decode_punycode("bcher-kva"), "bücher", "http://en.wikipedia.org/wiki/Punycode example decode"; + +for my $test_key (sort keys %RFC_3492) { + my $test = $RFC_3492{$test_key}; + is encode_punycode($test->{unicode}), $test->{ascii}, "$test_key encode"; + is decode_punycode($test->{ascii}), $test->{unicode}, "$test_key decode" unless $test_key eq "S"; +} + +sub udecode { + my $str = shift; + my @u; + for (split(" ", $str)) { + /^[uU]\+[\dA-F]{2,4}$/ || die "Unexpected ucode: $_"; + push(@u, chr(hex(substr($_, 2)))); + } + return join("", @u); +} diff --git a/t/query-param.t b/t/query-param.t new file mode 100644 index 0000000..fc852c0 --- /dev/null +++ b/t/query-param.t @@ -0,0 +1,71 @@ +use strict; +use warnings; + +use Test::More tests => 19; + +use URI; +use URI::QueryParam; + +my $u = URI->new("http://www.sol.no?foo=4&bar=5&foo=5"); + +is_deeply( + $u->query_form_hash, + { foo => [ 4, 5 ], bar => 5 }, + 'query_form_hash get' +); + +$u->query_form_hash({ a => 1, b => 2}); +ok $u->query eq "a=1&b=2" || $u->query eq "b=2&a=1", 'query_form_hash set'; + +$u->query("a=1&b=2&a=3&b=4&a=5"); +is join(':', $u->query_param), "a:b", 'query_param list keys'; + +is $u->query_param("a"), "1", "query_param scalar return"; +is join(":", $u->query_param("a")), "1:3:5", "query_param list return"; + +is $u->query_param(a => 11 .. 15), 1, "query_param set return"; + +is $u->query, "a=11&b=2&a=12&b=4&a=13&a=14&a=15", "param order"; + +is join(":", $u->query_param(a => 11)), "11:12:13:14:15", "old values returned"; + +is $u->query, "a=11&b=2&b=4"; + +is $u->query_param_delete("a"), "11", 'query_param_delete'; + +is $u->query, "b=2&b=4"; + +$u->query_param_append(a => 1, 3, 5); +$u->query_param_append(b => 6); + +is $u->query, "b=2&b=4&a=1&a=3&a=5&b=6"; + +$u->query_param(a => []); # same as $u->query_param_delete("a"); + +is $u->query, "b=2&b=4&b=6", 'delete by assigning empty list'; + +$u->query(undef); +$u->query_param(a => 1, 2, 3); +$u->query_param(b => 1); + +is $u->query, 'a=1&a=2&a=3&b=1', 'query_param from scratch'; + +$u->query_param_delete('a'); +$u->query_param_delete('b'); + +ok ! $u->query; + +is $u->as_string, 'http://www.sol.no'; + +$u->query(undef); +$u->query_param(a => 1, 2, 3); +$u->query_param(b => 1); + +is $u->query, 'a=1&a=2&a=3&b=1'; + +$u->query_param('a' => []); +$u->query_param('b' => []); + +ok ! $u->query; + +is $u->as_string, 'http://www.sol.no'; diff --git a/t/query.t b/t/query.t new file mode 100644 index 0000000..2970814 --- /dev/null +++ b/t/query.t @@ -0,0 +1,81 @@ +use strict; +use warnings; + +use Test::More tests => 23; + +use URI (); +my $u = URI->new("", "http"); +my @q; + +$u->query_form(a => 3, b => 4); +is $u, "?a=3&b=4"; + +$u->query_form(a => undef); +is $u, "?a="; + +$u->query_form("a[=&+#] " => " [=&+#]"); +is $u, "?a%5B%3D%26%2B%23%5D+=+%5B%3D%26%2B%23%5D"; + +@q = $u->query_form; +is join(":", @q), "a[=&+#] : [=&+#]"; + +@q = $u->query_keywords; +ok !@q; + +$u->query_keywords("a", "b"); +is $u, "?a+b"; + +$u->query_keywords(" ", "+", "=", "[", "]"); +is $u, "?%20+%2B+%3D+%5B+%5D"; + +@q = $u->query_keywords; +is join(":", @q), " :+:=:[:]"; + +@q = $u->query_form; +ok !@q; + +$u->query(" +?=#"); +is $u, "?%20+?=%23"; + +$u->query_keywords([qw(a b)]); +is $u, "?a+b"; + +$u->query_keywords([]); +is $u, ""; + +$u->query_form({ a => 1, b => 2 }); +ok $u eq "?a=1&b=2" || $u eq "?b=2&a=1"; + +$u->query_form([ a => 1, b => 2 ]); +is $u, "?a=1&b=2"; + +$u->query_form({}); +is $u, ""; + +$u->query_form([a => [1..4]]); +is $u, "?a=1&a=2&a=3&a=4"; + +$u->query_form([]); +is $u, ""; + +$u->query_form(a => { foo => 1 }); +ok "$u" =~ /^\?a=HASH\(/; + +$u->query_form(a => 1, b => 2, ';'); +is $u, "?a=1;b=2"; + +$u->query_form(a => 1, c => 2); +is $u, "?a=1;c=2"; + +$u->query_form(a => 1, c => 2, '&'); +is $u, "?a=1&c=2"; + +$u->query_form([a => 1, b => 2], ';'); +is $u, "?a=1;b=2"; + +$u->query_form([]); +{ + local $URI::DEFAULT_QUERY_FORM_DELIMITER = ';'; + $u->query_form(a => 1, b => 2); +} +is $u, "?a=1;b=2"; diff --git a/t/rel.t b/t/rel.t new file mode 100644 index 0000000..104ae5d --- /dev/null +++ b/t/rel.t @@ -0,0 +1,21 @@ +use strict; +use warnings; + +use Test::More; + +plan tests => 6; + +use URI; + +my $uri; + +$uri = URI->new("http://www.example.com/foo/bar/"); +is($uri->rel("http://www.example.com/foo/bar/"), "./"); +is($uri->rel("HTTP://WWW.EXAMPLE.COM/foo/bar/"), "./"); +is($uri->rel("HTTP://WWW.EXAMPLE.COM/FOO/BAR/"), "../../foo/bar/"); +is($uri->rel("HTTP://WWW.EXAMPLE.COM:80/foo/bar/"), "./"); + +$uri = URI->new("http://www.example.com/foo/bar"); +is($uri->rel("http://www.example.com/foo/bar"), "bar"); +is($uri->rel("http://www.example.com/foo"), "foo/bar"); + diff --git a/t/rfc2732.t b/t/rfc2732.t new file mode 100644 index 0000000..d69960a --- /dev/null +++ b/t/rfc2732.t @@ -0,0 +1,59 @@ +# Test URIs containing IPv6 addresses + +use strict; +use warnings; + +use Test::More tests => 19; + +use URI; +my $uri = URI->new("http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html"); + +is $uri->as_string, "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html"; +is $uri->host, "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210"; +is $uri->host_port, "[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80"; +is $uri->port, "80"; + +$uri->port(undef); +is $uri->as_string, "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]/index.html"; +is $uri->host_port, "[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80"; +$uri->port(80); + +$uri->host("host"); +is $uri->as_string, "http://host:80/index.html"; + +$uri->host("FEDC:BA98:7654:3210:FEDC:BA98:7654:3210"); +is $uri->as_string, "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html"; +$uri->host_port("[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:88"); +is $uri->as_string, "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:88/index.html"; +$uri->host_port("[::1]:80"); +is $uri->as_string, "http://[::1]:80/index.html"; +$uri->host("::1:80"); +is $uri->as_string, "http://[::1:80]:80/index.html"; +$uri->host("[::1:80]"); +is $uri->as_string, "http://[::1:80]:80/index.html"; +$uri->host("[::1]:88"); +is $uri->as_string, "http://[::1]:88/index.html"; + + +$uri = URI->new("ftp://ftp:@[3ffe:2a00:100:7031::1]"); +is $uri->as_string, "ftp://ftp:@[3ffe:2a00:100:7031::1]"; + +is $uri->port, "21"; +ok !$uri->_port; + +is $uri->host("ftp"), "3ffe:2a00:100:7031::1"; + +is $uri, "ftp://ftp:\@ftp"; + +$uri = URI->new("http://[::1]"); +is $uri->host, "::1"; + +__END__ + + http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html + http://[1080:0:0:0:8:800:200C:417A]/index.html + http://[3ffe:2a00:100:7031::1] + http://[1080::8:800:200C:417A]/foo + http://[::192.9.5.5]/ipng + http://[::FFFF:129.144.52.38]:80/index.html + http://[2010:836B:4179::836B:4179] diff --git a/t/roy-test.t b/t/roy-test.t new file mode 100644 index 0000000..a7a9fdc --- /dev/null +++ b/t/roy-test.t @@ -0,0 +1,44 @@ +use strict; +use warnings; + +use Test qw(plan ok); +plan tests => 102; + +use URI; +use File::Spec::Functions qw(catfile); + +my $no = 1; + +my @prefix; +push(@prefix, "t") if -d "t"; + +for my $i (1..5) { + my $file = catfile(@prefix, "roytest$i.html"); + + open(FILE, $file) || die "Can't open $file: $!"; + print "# $file\n"; + my $base = undef; + while () { + if (/^/) { + $base = URI->new($1); + } elsif (/^.*<\/a>\s*=\s*(\S+)/) { + die "Missing base at line $." unless $base; + my $link = $1; + my $exp = $2; + $exp = $base if $exp =~ /current/; # special case test 22 + + # rfc2396bis restores the rfc1808 behaviour + if ($no == 7) { + $exp = "http://a/b/c/d;p?y"; + } + elsif ($no == 48) { + $exp = "http://a/b/c/d;p?y"; + } + + ok(URI->new($link)->abs($base), $exp); + + $no++; + } + } + close(FILE); +} diff --git a/t/roytest1.html b/t/roytest1.html new file mode 100644 index 0000000..95fedbe --- /dev/null +++ b/t/roytest1.html @@ -0,0 +1,194 @@ + +Examples of Resolving Relative URLs + + +

Examples of Resolving Relative URLs

+ +This document has an embedded base URL of +
+   Content-Base: http://a/b/c/d;p?q
+
+the relative URLs should be resolved as shown below. +

+I will need your help testing the examples on multiple browsers. +What you need to do is point to the example anchor and compare it to the +resolved URL in your browser (most browsers have a feature by which you +can see the resolved URL at the bottom of the window/screen when the anchor +is active). + +

Tested Clients and Client Libraries

+ +
+
[R] +
RFC 2396 (the right way to parse) +
[X] +
RFC 1808 +
[1] +
Mozilla/4.03 [en] (X11; U; SunOS 5.5 sun4u; Nav) +
[2] +
Lynx/2.7.1 libwww-FM/2.14 +
[3] +
MSIE 3.01; Windows 95 +
[4] +
NCSA_Mosaic/2.6 (X11;SunOS 4.1.2 sun4m) libwww/2.12 +
[5] +
libwww-perl/5.14 [Martijn Koster] +
+ +

Normal Examples

+
+              RESULTS                     from
+ 
+g:h        =  g:h                         [R,X,2,3,4,5]
+              http://a/b/c/g:h            [1]
+
+g          =  http://a/b/c/g              [R,X,1,2,3,4,5]
+
+./g        =  http://a/b/c/g              [R,X,1,2,3,4,5]
+
+g/         =  http://a/b/c/g/             [R,X,1,2,3,4,5]
+
+/g         =  http://a/g                  [R,X,1,2,3,4,5]
+
+//g        =  http://g                    [R,X,1,2,3,4,5]
+
+?y         =  http://a/b/c/?y             [R,1,2,3,4]
+              http://a/b/c/d;p?y          [X,5]
+
+g?y        =  http://a/b/c/g?y            [R,X,1,2,3,4,5]
+
+#s         =  (current document)#s        [R,2,4]
+              http://a/b/c/d;p?q#s        [X,1,3,5]
+
+g#s        =  http://a/b/c/g#s            [R,X,1,2,3,4,5]
+
+g?y#s      =  http://a/b/c/g?y#s          [R,X,1,2,3,4,5]
+
+;x         =  http://a/b/c/;x             [R,1,2,3,4]
+              http://a/b/c/d;x            [X,5]
+
+g;x        =  http://a/b/c/g;x            [R,X,1,2,3,4,5]
+
+g;x?y#s    =  http://a/b/c/g;x?y#s        [R,X,1,2,3,4,5]
+
+.          =  http://a/b/c/               [R,X,2,5]
+              http://a/b/c/.              [1]
+              http://a/b/c                [3,4]
+
+./         =  http://a/b/c/               [R,X,1,2,3,4,5]
+
+..         =  http://a/b/                 [R,X,2,5]
+              http://a/b                  [1,3,4]
+
+../        =  http://a/b/                 [R,X,1,2,3,4,5]
+
+../g       =  http://a/b/g                [R,X,1,2,3,4,5]
+
+../..      =  http://a/                   [R,X,2,5]
+              http://a                    [1,3,4]
+
+../../     =  http://a/                   [R,X,1,2,3,4,5]
+
+../../g    =  http://a/g                  [R,X,1,2,3,4,5]
+
+ +

Abnormal Examples

+ +Although the following abnormal examples are unlikely to occur in +normal practice, all URL parsers should be capable of resolving them +consistently. Each example uses the same base as above.

+ +An empty reference refers to the start of the current document. +

+<>         =  (current document)          [R,2,4]
+              http://a/b/c/d;p?q          [X,3,5]
+              http://a/b/c/               [1]
+
+Parsers must be careful in handling the case where there are more +relative path ".." segments than there are hierarchical levels in the +base URL's path. Note that the ".." syntax cannot be used to change +the site component of a URL. +
+../../../g    =  http://a/../g            [R,X,2,4,5]
+                 http://a/g               [R,1,3]
+
+../../../../g =  http://a/../../g         [R,X,2,4,5]
+                 http://a/g               [R,1,3]
+
+In practice, some implementations strip leading relative symbolic +elements (".", "..") after applying a relative URL calculation, based +on the theory that compensating for obvious author errors is better +than allowing the request to fail. Thus, the above two references +will be interpreted as "http://a/g" by some implementations. +

+Similarly, parsers must avoid treating "." and ".." as special when +they are not complete components of a relative path. +

+/./g      =  http://a/./g                 [R,X,2,3,4,5]
+             http://a/g                   [1]
+
+/../g     =  http://a/../g                [R,X,2,3,4,5]
+             http://a/g                   [1]
+
+g.        =  http://a/b/c/g.              [R,X,1,2,3,4,5]
+
+.g        =  http://a/b/c/.g              [R,X,1,2,3,4,5]
+
+g..       =  http://a/b/c/g..             [R,X,1,2,3,4,5]
+
+..g       =  http://a/b/c/..g             [R,X,1,2,3,4,5]
+
+Less likely are cases where the relative URL uses unnecessary or +nonsensical forms of the "." and ".." complete path segments. +
+./../g     =  http://a/b/g                [R,X,1,2,5]
+              http://a/b/c/../g           [3,4]
+
+./g/.      =  http://a/b/c/g/             [R,X,2,5]
+              http://a/b/c/g/.            [1]
+              http://a/b/c/g              [3,4]
+
+g/./h      =  http://a/b/c/g/h            [R,X,1,2,3,4,5]
+
+g/../h     =  http://a/b/c/h              [R,X,1,2,3,4,5]
+
+g;x=1/./y  =  http://a/b/c/g;x=1/y        [R,1,2,3,4]
+              http://a/b/c/g;x=1/./y      [X,5]
+
+g;x=1/../y =  http://a/b/c/y              [R,1,2,3,4]
+              http://a/b/c/g;x=1/../y     [X,5]
+
+
+All client applications remove the query component from the base URL +before resolving relative URLs. However, some applications fail to +separate the reference's query and/or fragment components from a +relative path before merging it with the base path. This error is +rarely noticed, since typical usage of a fragment never includes the +hierarchy ("/") character, and the query component is not normally +used within relative references. +
+g?y/./x    =  http://a/b/c/g?y/./x        [R,X,5]
+              http://a/b/c/g?y/x          [1,2,3,4]
+
+g?y/../x   =  http://a/b/c/g?y/../x       [R,X,5]
+              http://a/b/c/x              [1,2,3,4]
+
+g#s/./x    =  http://a/b/c/g#s/./x        [R,X,2,3,4,5]
+              http://a/b/c/g#s/x          [1]
+
+g#s/../x   =  http://a/b/c/g#s/../x       [R,X,2,3,4,5]
+              http://a/b/c/x              [1]
+
+ Some parsers allow the scheme name to be present in a relative URI if + it is the same as the base URI scheme. This is considered to be a + loophole in prior specifications of partial URI [RFC1630]. Its use + should be avoided. +
+http:g    =  http:g                       [R,X,5]
+          |  http://a/b/c/g               [1,2,3,4]  (ok for compat.)
+
+http:     =  http:                        [R,X,5]
+             http://a/b/c/                [1]
+             http://a/b/c/d;p?q           [2,3,4]
+
+ diff --git a/t/roytest2.html b/t/roytest2.html new file mode 100644 index 0000000..3906f4e --- /dev/null +++ b/t/roytest2.html @@ -0,0 +1,100 @@ + +Examples of Resolving Relative URLs, Part 2 + + +

Examples of Resolving Relative URLs, Part 2

+ +This document has an embedded base URL of +
+   Content-Base: http://a/b/c/d;p?q=1/2
+
+the relative URLs should be resolved as shown below. In this test page, +I am particularly interested in testing whether "/" in query information +is or is not treated as part of the path hierarchy. +

+I will need your help testing the examples on multiple browsers. +What you need to do is point to the example anchor and compare it to the +resolved URL in your browser (most browsers have a feature by which you +can see the resolved URL at the bottom of the window/screen when the anchor +is active). + +

Tested Clients and Client Libraries

+ +
+
[R] +
RFC 2396 (the right way to parse) +
[X] +
RFC 1808 +
[1] +
Mozilla/4.03 [en] (X11; U; SunOS 5.5 sun4u; Nav) +
[2] +
Lynx/2.7.1 libwww-FM/2.14 +
[3] +
MSIE 3.01; Windows 95 +
[4] +
NCSA_Mosaic/2.6 (X11;SunOS 4.1.2 sun4m) libwww/2.12 +
+ +

Synopsis

+ +RFC 1808 specified that the "/" character within query information +does not affect the hierarchy within URL parsing. It would appear that +it does in current practice, but only within the relative path after +it is attached to the base path. In other words, the base URL's query +information is being stripped off before any relative resolution, but +some parsers fail to separate the query information from the relative +path.

+ +We have decided that this behavior is due to an oversight in the original +libwww implementation, and it is better to correct the oversight in future +parsers than it is to make a nonsensical standard. A note has been added +to the URI draft to account for the differences in implementations. This should +have no impact on current practice since unescaped "/" is rarely (if ever) +used within the query part of a URL, and query parts themselves are rarely +used with relative URLs. + +

Examples

+
+              RESULTS                     from
+ 
+g          =  http://a/b/c/g              [R,X,1,2,3,4]
+
+./g        =  http://a/b/c/g              [R,X,1,2,3,4]
+
+g/         =  http://a/b/c/g/             [R,X,1,2,3,4]
+
+/g         =  http://a/g                  [R,X,1,2,3,4]
+
+//g        =  http://g                    [R,X,1,2,3,4]
+
+?y         =  http://a/b/c/?y             [R,1,2,3,4]
+              http://a/b/c/d;p?y          [X]
+
+g?y        =  http://a/b/c/g?y            [R,X,1,2,3,4]
+
+g?y/./x    =  http://a/b/c/g?y/./x        [R,X]
+              http://a/b/c/g?y/x          [1,2,3,4]
+
+g?y/../x   =  http://a/b/c/g?y/../x       [R,X]
+              http://a/b/c/x              [1,2,3,4]
+
+g#s        =  http://a/b/c/g#s            [R,X,1,2,3,4]
+
+g#s/./x    =  http://a/b/c/g#s/./x        [R,X,2,3,4]
+              http://a/b/c/g#s/x          [1]
+
+g#s/../x   =  http://a/b/c/g#s/../x       [R,X,2,3,4]
+              http://a/b/c/x              [1]
+
+./         =  http://a/b/c/               [R,X,1,2,3,4]
+
+../        =  http://a/b/                 [R,X,1,2,3,4]
+
+../g       =  http://a/b/g                [R,X,1,2,3,4]
+
+../../     =  http://a/                   [R,X,1,2,3,4]
+
+../../g    =  http://a/g                  [R,X,1,2,3,4]
+
+
+ diff --git a/t/roytest3.html b/t/roytest3.html new file mode 100644 index 0000000..699558f --- /dev/null +++ b/t/roytest3.html @@ -0,0 +1,89 @@ + +Examples of Resolving Relative URLs, Part 3 + + +

Examples of Resolving Relative URLs, Part 3

+ +This document has an embedded base URL of +
+   Content-Base: http://a/b/c/d;p=1/2?q
+
+the relative URLs should be resolved as shown below. For this test page, +I am particularly interested in testing whether "/" in parameters is or is not +treated as part of the path hierarchy. +

+I will need your help testing the examples on multiple browsers. +What you need to do is point to the example anchor and compare it to the +resolved URL in your browser (most browsers have a feature by which you +can see the resolved URL at the bottom of the window/screen when the anchor +is active). + +

Tested Clients and Client Libraries

+ +
+
[R] +
RFC 2396 (the right way to parse) +
[X] +
RFC 1808 +
[1] +
Mozilla/4.03 [en] (X11; U; SunOS 5.5 sun4u; Nav) +
[2] +
Lynx/2.7.1 libwww-FM/2.14 +
[3] +
MSIE 3.01; Windows 95 +
[4] +
NCSA_Mosaic/2.6 (X11;SunOS 4.1.2 sun4m) libwww/2.12 +
+ +

Synopsis

+ +RFC 1808 specified that the "/" character within parameter information +does not affect the hierarchy within URL parsing. It would appear that +it does in current practice. This implies that the parameters should +be part of each path segment and not outside the path. The URI draft has +been written accordingly. + +

Examples

+
+              RESULTS                     from
+
+g          =  http://a/b/c/d;p=1/g        [R,1,2,3,4]
+              http://a/b/c/g              [X]
+
+./g        =  http://a/b/c/d;p=1/g        [R,1,2,3,4]
+              http://a/b/c/g              [X]
+
+g/         =  http://a/b/c/d;p=1/g/       [R,1,2,3,4]
+              http://a/b/c/g/             [X]
+
+g?y        =  http://a/b/c/d;p=1/g?y      [R,1,2,3,4]
+              http://a/b/c/g?y            [X]
+
+;x         =  http://a/b/c/d;p=1/;x       [R,1,2,3,4]
+              http://a/b/c/d;x            [X]
+
+g;x        =  http://a/b/c/d;p=1/g;x      [R,1,2,3,4]
+              http://a/b/c/g;x            [X]
+
+g;x=1/./y  =  http://a/b/c/d;p=1/g;x=1/y  [R,1,2,3,4]
+              http://a/b/c/g;x=1/./y      [X]
+
+g;x=1/../y =  http://a/b/c/d;p=1/y        [R,1,2,3,4]
+              http://a/b/c/g;x=1/../y     [X]
+
+./         =  http://a/b/c/d;p=1/         [R,1,2,3,4]
+              http://a/b/c/               [X]
+
+../        =  http://a/b/c/               [R,1,2,3,4]
+              http://a/b/                 [X]
+
+../g       =  http://a/b/c/g              [R,1,2,3,4]
+              http://a/b/g                [X]
+
+../../     =  http://a/b/                 [R,1,2,3,4]
+              http://a/                   [X]
+
+../../g    =  http://a/b/g                [R,1,2,3,4]
+              http://a/g                  [X]
+
+ diff --git a/t/roytest4.html b/t/roytest4.html new file mode 100644 index 0000000..160554c --- /dev/null +++ b/t/roytest4.html @@ -0,0 +1,98 @@ + +Examples of Resolving Relative URLs, Part 4 + + +

Examples of Resolving Relative URLs, Part 4

+ +This document has an embedded base URL of +
+   Content-Base: fred:///s//a/b/c
+
+in order to test a notion that Tim Berners-Lee mentioned regarding +the ability of URIs to have a triple-slash (or even more slashes) +to indicate higher levels of hierarchy than those already used by URLs. + +

Tested Clients and Client Libraries

+ +
+
[R] +
RFC 2396 (the right way to parse) +
Tim +
Tim Berners-Lee's proposed interpretation +
[1] +
Mozilla/4.03 [en] (X11; U; SunOS 5.5 sun4u; Nav) +
[2] +
Lynx/2.7.1 libwww-FM/2.14 +
[3] +
MSIE 3.01; Windows 95 +
[4] +
NCSA_Mosaic/2.6 (X11;SunOS 4.1.2 sun4m) +
+ +

Synopsis

+ +RFC 1808 specified that the highest level for relative URLs is indicated +by a double-slash "//", and therefore that any triple-slash would be +considered a null site component, rather than a higher-level component +than the site component (as proposed by Tim).

+ +The URI draft assumes that a triple-slash means an empty site component. +Netscape Navigator behaves irrationally, apparently because their parser +is scheme-dependent and therefore doesn't do the hierarchical parsing that +would be expected. Oddly, Lynx seems to straddle both sides. + +

Examples

+
+                  RESULTS                       from
+
+g:h            =  g:h                           [R,Tim,2,3]
+                  fred:///s//a/b/g:h            [1]
+
+g              =  fred:///s//a/b/g              [R,Tim,1,2,3]
+
+./g            =  fred:///s//a/b/g              [R,Tim,2,3]
+                  fred:///s//a/b/./g            [1]
+
+g/             =  fred:///s//a/b/g/             [R,Tim,1,2,3]
+
+/g             =  fred:///g                     [R,1,2,3]
+                  fred:///s//a/g                [Tim]
+
+//g            =  fred://g                      [R,1,2,3]
+                  fred:///s//g                  [Tim]
+
+//g/x          =  fred://g/x                    [R,1,2,3]
+                  fred:///s//g/x                [Tim]
+
+///g           =  fred:///g                     [R,Tim,1,2,3]
+
+./             =  fred:///s//a/b/               [R,Tim,2,3]
+                  fred:///s//a/b/./             [1]
+
+../            =  fred:///s//a/                 [R,Tim,2,3]
+                  fred:///s//a/b/../            [1]
+
+../g           =  fred:///s//a/g                [R,Tim,2,3]
+                  fred:///s//a/b/../g           [1]
+
+../../         =  fred:///s//                   [R]
+                  fred:///s//a/../              [Tim,2]
+                  fred:///s//a/b/../../         [1]
+                  fred:///s//a/                 [3]
+
+../../g        =  fred:///s//g                  [R]
+                  fred:///s//a/../g             [Tim,2]
+                  fred:///s//a/b/../../g        [1]
+                  fred:///s//a/g                [3]
+
+../../../g     =  fred:///s/g                   [R]
+                  fred:///s//a/../../g          [Tim,2]
+                  fred:///s//a/b/../../../g     [1]
+                  fred:///s//a/g                [3]
+
+../../../../g  =  fred:///g                     [R]
+                  fred:///s//a/../../../g       [Tim,2]
+                  fred:///s//a/b/../../../../g  [1]
+                  fred:///s//a/g                [3]
+
+ diff --git a/t/roytest5.html b/t/roytest5.html new file mode 100644 index 0000000..1b24361 --- /dev/null +++ b/t/roytest5.html @@ -0,0 +1,92 @@ + +Examples of Resolving Relative URLs, Part 5 + + +

Examples of Resolving Relative URLs, Part 5

+ +This document has an embedded base URL of +
+   Content-Base: http:///s//a/b/c
+
+in order to test a notion that Tim Berners-Lee mentioned regarding +the ability of URIs to have a triple-slash (or even more slashes) +to indicate higher levels of hierarchy than those already used by URLs. +This is the same as Part 4, except that the scheme "fred" is replaced +with "http" for clients that stupidly change their parsing behavior +based on the scheme name. + +

Tested Clients and Client Libraries

+ +
+
[R] +
RFC 2396 (the right way to parse) +
Tim +
Tim Berners-Lee's proposed interpretation +
[1] +
Mozilla/4.03 [en] (X11; U; SunOS 5.5 sun4u; Nav) +
[2] +
Lynx/2.7.1 libwww-FM/2.14 +
[3] +
MSIE 3.01; Windows 95 +
[4] +
NCSA_Mosaic/2.6 (X11;SunOS 4.1.2 sun4m) +
+ +

Synopsis

+ +RFC 1808 specified that the highest level for relative URLs is indicated +by a double-slash "//", and therefore that any triple-slash would be +considered a null site component, rather than a higher-level component +than the site component (as proposed by Tim).

+ +Draft 09 assumes that a triple-slash means an empty site component, +as does Netscape Navigator if the scheme is known. +Oddly, Lynx seems to straddle both sides. + +

Examples

+
+                  RESULTS                       from
+
+g:h            =  g:h                           [R,Tim,2,3]
+                  http:///s//a/b/g:h            [1]
+
+g              =  http:///s//a/b/g              [R,Tim,1,2,3]
+
+./g            =  http:///s//a/b/g              [R,Tim,1,2,3]
+
+g/             =  http:///s//a/b/g/             [R,Tim,1,2,3]
+
+/g             =  http:///g                     [R,1,2,3]
+                  http:///s//a/g                [Tim]
+
+//g            =  http://g                      [R,1,2,3]
+                  http:///s//g                  [Tim]
+
+//g/x          =  http://g/x                    [R,1,2,3]
+                  http:///s//g/x                [Tim]
+
+///g           =  http:///g                     [R,Tim,1,2,3]
+
+./             =  http:///s//a/b/               [R,Tim,1,2,3]
+
+../            =  http:///s//a/                 [R,Tim,1,2,3]
+
+../g           =  http:///s//a/g                [R,Tim,1,2,3]
+
+../../         =  http:///s//                   [R,1]
+                  http:///s//a/../              [Tim,2]
+                  http:///s//a/                 [3]
+
+../../g        =  http:///s//g                  [R,1]
+                  http:///s//a/../g             [Tim,2]
+                  http:///s//a/g                [3]
+
+../../../g     =  http:///s/g                   [R,1]
+                  http:///s//a/../../g          [Tim,2]
+                  http:///s//a/g                [3]
+
+../../../../g  =  http:///g                     [R,1]
+                  http:///s//a/../../../g       [Tim,2]
+                  http:///s//a/g                [3]
+
+ diff --git a/t/rsync.t b/t/rsync.t new file mode 100644 index 0000000..01e91d7 --- /dev/null +++ b/t/rsync.t @@ -0,0 +1,23 @@ +use strict; +use warnings; + +print "1..4\n"; + +use URI; + +my $u = URI->new('rsync://gisle@perl.com/foo/bar'); + +print "not " unless $u->user eq "gisle"; +print "ok 1\n"; + +print "not " unless $u->port eq 873; +print "ok 2\n"; + +print "not " unless $u->path eq "/foo/bar"; +print "ok 3\n"; + +$u->port(8730); + +print "not " unless $u eq 'rsync://gisle@perl.com:8730/foo/bar'; +print "ok 4\n"; + diff --git a/t/rtsp.t b/t/rtsp.t new file mode 100644 index 0000000..208b63b --- /dev/null +++ b/t/rtsp.t @@ -0,0 +1,43 @@ +use strict; +use warnings; + +print "1..9\n"; + +use URI; + +my $u = URI->new(""); + +#print "$u\n"; +print "not " unless $u eq "rtsp://media.perl.com/f%F4o.smi/"; +print "ok 1\n"; + +print "not " unless $u->port == 554; +print "ok 2\n"; + +# play with port +my $old = $u->port(8554); +print "not " unless $old == 554 && $u eq "rtsp://media.perl.com:8554/f%F4o.smi/"; +print "ok 3\n"; + +$u->port(554); +print "not " unless $u eq "rtsp://media.perl.com:554/f%F4o.smi/"; +print "ok 4\n"; + +$u->port(""); +print "not " unless $u eq "rtsp://media.perl.com:/f%F4o.smi/" && $u->port == 554; +print "ok 5\n"; + +$u->port(undef); +print "not " unless $u eq "rtsp://media.perl.com/f%F4o.smi/"; +print "ok 6\n"; + +print "not " unless $u->host eq "media.perl.com"; +print "ok 7\n"; + +print "not " unless $u->path eq "/f%F4o.smi/"; +print "ok 8\n"; + +$u->scheme("rtspu"); +print "not " unless $u->scheme eq "rtspu"; +print "ok 9\n"; + diff --git a/t/sip.t b/t/sip.t new file mode 100644 index 0000000..506bba3 --- /dev/null +++ b/t/sip.t @@ -0,0 +1,69 @@ +use strict; +use warnings; + +print "1..11\n"; + +use URI; + +my $u = URI->new('sip:phone@domain.ext'); +print "not " unless $u->user eq 'phone' && + $u->host eq 'domain.ext' && + $u->port eq '5060' && + $u eq 'sip:phone@domain.ext'; +print "ok 1\n"; + +$u->host_port('otherdomain.int:9999'); +print "not " unless $u->host eq 'otherdomain.int' && + $u->port eq '9999' && + $u eq 'sip:phone@otherdomain.int:9999'; +print "ok 2\n"; + +$u->port('5060'); +$u = $u->canonical; +print "not " unless $u->host eq 'otherdomain.int' && + $u->port eq '5060' && + $u eq 'sip:phone@otherdomain.int'; +print "ok 3\n"; + +$u->user('voicemail'); +print "not " unless $u->user eq 'voicemail' && + $u eq 'sip:voicemail@otherdomain.int'; +print "ok 4\n"; + +$u = URI->new('sip:phone@domain.ext?Subject=Meeting&Priority=Urgent'); +print "not " unless $u->host eq 'domain.ext' && + $u->query eq 'Subject=Meeting&Priority=Urgent'; +print "ok 5\n"; + +$u->query_form(Subject => 'Lunch', Priority => 'Low'); +my @q = $u->query_form; +print "not " unless $u->host eq 'domain.ext' && + $u->query eq 'Subject=Lunch&Priority=Low' && + @q == 4 && "@q" eq "Subject Lunch Priority Low"; +print "ok 6\n"; + +$u = URI->new('sip:phone@domain.ext;maddr=127.0.0.1;ttl=16'); +print "not " unless $u->host eq 'domain.ext' && + $u->params eq 'maddr=127.0.0.1;ttl=16'; +print "ok 7\n"; + +$u = URI->new('sip:phone@domain.ext?Subject=Meeting&Priority=Urgent'); +$u->params_form(maddr => '127.0.0.1', ttl => '16'); +my @p = $u->params_form; +print "not " unless $u->host eq 'domain.ext' && + $u->query eq 'Subject=Meeting&Priority=Urgent' && + $u->params eq 'maddr=127.0.0.1;ttl=16' && + @p == 4 && "@p" eq "maddr 127.0.0.1 ttl 16"; + +print "ok 8\n"; + +$u = URI->new_abs('sip:phone@domain.ext', 'sip:foo@domain2.ext'); +print "not " unless $u eq 'sip:phone@domain.ext'; +print "ok 9\n"; + +$u = URI->new('sip:phone@domain.ext'); +print "not " unless $u eq $u->abs('http://www.cpan.org/'); +print "ok 10\n"; + +print "not " unless $u eq $u->rel('http://www.cpan.org/'); +print "ok 11\n"; diff --git a/t/sort-hash-query-form.t b/t/sort-hash-query-form.t new file mode 100644 index 0000000..7c6f896 --- /dev/null +++ b/t/sort-hash-query-form.t @@ -0,0 +1,22 @@ +use strict; +use warnings; + +use Test::More; + +# ABSTRACT: Make sure query_form(\%hash) is sorted + +use URI; + +my $base = URI->new('http://example.org/'); + +my $i = 1; + +my $hash = { map { $_ => $i++ } qw( a b c d e f ) }; + +$base->query_form($hash); + +is("$base","http://example.org/?a=1&b=2&c=3&d=4&e=5&f=6", "Query parameters are sorted"); + +done_testing; + + diff --git a/t/split.t b/t/split.t new file mode 100644 index 0000000..34104b8 --- /dev/null +++ b/t/split.t @@ -0,0 +1,59 @@ +use strict; +use warnings; + +print "1..17\n"; + +use URI::Split qw(uri_split uri_join); + +sub j { join("-", map { defined($_) ? $_ : "" } @_) } + +print "not " unless j(uri_split("p")) eq "--p--"; +print "ok 1\n"; + +print "not " unless j(uri_split("p?q")) eq "--p-q-"; +print "ok 2\n"; + +print "not " unless j(uri_split("p#f")) eq "--p--f"; +print "ok 3\n"; + +print "not " unless j(uri_split("p?q/#f/?")) eq "--p-q/-f/?"; +print "ok 4\n"; + +print "not " unless j(uri_split("s://a/p?q#f")) eq "s-a-/p-q-f"; +print "ok 5\n"; + +print "not " unless uri_join("s", "a", "/p", "q", "f") eq "s://a/p?q#f"; +print "ok 6\n"; + +print "not " unless uri_join("s", "a", "p", "q", "f") eq "s://a/p?q#f"; +print "ok 7\n"; + +print "not " unless uri_join(undef, undef, "", undef, undef) eq ""; +print "ok 8\n"; + +print "not " unless uri_join(undef, undef, "p", undef, undef) eq "p"; +print "ok 9\n"; + +print "not " unless uri_join("s", undef, "p") eq "s:p"; +print "ok 10\n"; + +print "not " unless uri_join("s") eq "s:"; +print "ok 11\n"; + +print "not " unless uri_join() eq ""; +print "ok 12\n"; + +print "not " unless uri_join("s", "a") eq "s://a"; +print "ok 13\n"; + +print "not " unless uri_join("s", "a/b") eq "s://a%2Fb"; +print "ok 14\n"; + +print "not " unless uri_join("s", ":/?#", ":/?#", ":/?#", ":/?#") eq "s://:%2F%3F%23/:/%3F%23?:/?%23#:/?#"; +print "ok 15\n"; + +print "not " unless uri_join(undef, undef, "a:b") eq "a%3Ab"; +print "ok 16\n"; + +print "not " unless uri_join("s", undef, "//foo//bar") eq "s:////foo//bar"; +print "ok 17\n"; diff --git a/t/storable-test.pl b/t/storable-test.pl new file mode 100644 index 0000000..33deb6f --- /dev/null +++ b/t/storable-test.pl @@ -0,0 +1,27 @@ +use strict; +use warnings; +use Storable; + +if (@ARGV && $ARGV[0] eq "store") { + require URI; + require URI::URL; + my $a = { + u => new URI('http://search.cpan.org/'), + }; + print "# store\n"; + store [URI->new("http://search.cpan.org")], 'urls.sto'; +} else { + print "# retrieve\n"; + my $a = retrieve 'urls.sto'; + my $u = $a->[0]; + #use Data::Dumper; print Dumper($a); + + print "not " unless $u eq "http://search.cpan.org"; + print "ok 1\n"; + + print "not " unless $u->scheme eq "http"; + print "ok 2\n"; + + print "not " unless ref($u) eq "URI::http"; + print "ok 3\n"; +} diff --git a/t/storable.t b/t/storable.t new file mode 100644 index 0000000..cf6e65a --- /dev/null +++ b/t/storable.t @@ -0,0 +1,16 @@ +use strict; +use warnings; + +eval { + require Storable; + print "1..3\n"; +}; +if ($@) { + print "1..0 # skipped: Needs the Storable module installed\n"; + exit; +} + +system($^X, "-Iblib/lib", "t/storable-test.pl", "store"); +system($^X, "-Iblib/lib", "t/storable-test.pl", "retrieve"); + +unlink('urls.sto'); diff --git a/t/urn-isbn.t b/t/urn-isbn.t new file mode 100644 index 0000000..d8985f7 --- /dev/null +++ b/t/urn-isbn.t @@ -0,0 +1,62 @@ +use strict; +use warnings; + +eval { + require Business::ISBN; +}; +if ($@) { + print "1..0 # Skipped: Needs the Business::ISBN module installed\n\n"; + print $@; + exit; +} + +print "1..13\n"; + +use URI; +my $u = URI->new("URN:ISBN:0395363411"); + +print "not " unless $u eq "URN:ISBN:0395363411" && + $u->scheme eq "urn" && + $u->nid eq "isbn"; +print "ok 1\n"; + +print "not " unless $u->canonical eq "urn:isbn:0-395-36341-1"; +print "ok 2\n"; + +print "not " unless $u->isbn eq "0-395-36341-1"; +print "ok 3\n"; + +print "not " unless $u->isbn_group_code == 0; +print "ok 4\n"; + +print "not " unless $u->isbn_publisher_code == 395; +print "ok 5\n"; + +print "not " unless $u->isbn13 eq "9780395363416"; +print "ok 6\n"; + +print "not " unless $u->nss eq "0395363411"; +print "ok 7\n"; + +print "not " unless $u->isbn("0-88730-866-x") eq "0-395-36341-1"; +print "ok 8\n"; + +print "not " unless $u->nss eq "0-88730-866-x"; +print "ok 9\n"; + +print "not " unless $u->isbn eq "0-88730-866-X"; +print "ok 10\n"; + +print "not " unless URI::eq("urn:isbn:088730866x", "URN:ISBN:0-88-73-08-66-X"); +print "ok 11\n"; + +# try to illegal ones +$u = URI->new("urn:ISBN:abc"); +print "not " unless $u eq "urn:ISBN:abc"; +print "ok 12\n"; + +print "not " if $u->nss ne "abc" || defined $u->isbn; +print "ok 13\n"; + + + diff --git a/t/urn-oid.t b/t/urn-oid.t new file mode 100644 index 0000000..d35e524 --- /dev/null +++ b/t/urn-oid.t @@ -0,0 +1,24 @@ +use strict; +use warnings; + +print "1..4\n"; + +use URI; + +my $u = URI->new("urn:oid"); + +$u->oid(1..10); + +#print "$u\n"; + +print "not " unless $u eq "urn:oid:1.2.3.4.5.6.7.8.9.10"; +print "ok 1\n"; + +print "not " unless $u->oid eq "1.2.3.4.5.6.7.8.9.10"; +print "ok 2\n"; + +print "not " unless $u->scheme eq "urn" && $u->nid eq "oid"; +print "ok 3\n"; + +print "not " unless $u->oid eq $u->nss; +print "ok 4\n"; diff --git a/t/utf8.t b/t/utf8.t new file mode 100644 index 0000000..1453cfc --- /dev/null +++ b/t/utf8.t @@ -0,0 +1,20 @@ +use strict; +use warnings; + +use utf8; + +use Test::More 'no_plan'; +use URI; + +is(URI->new('http://foobar/mooi€e')->as_string, 'http://foobar/mooi%E2%82%ACe'); + +my $uri = URI->new('http:'); +$uri->query_form("mooi€e" => "mooi€e"); +is( $uri->query, "mooi%E2%82%ACe=mooi%E2%82%ACe" ); +is( ($uri->query_form)[1], "mooi\xE2\x82\xACe" ); + +# RT#70161 +use Encode; +$uri = URI->new(decode_utf8 '?Query=%C3%A4%C3%B6%C3%BC'); +is( ($uri->query_form)[1], "\xC3\xA4\xC3\xB6\xC3\xBC"); +is( decode_utf8(($uri->query_form)[1]), 'äöü'); diff --git a/uri-test b/uri-test new file mode 100755 index 0000000..ca30ef8 --- /dev/null +++ b/uri-test @@ -0,0 +1,58 @@ +#!/usr/bin/perl -w + +use strict; +use warnings; + +sub usage { + my $prog = $0; $prog =~ s,.*/,,; + die "Usage: $prog [ []...]\n"; +} + +usage() unless @ARGV; +my $uri = shift; +my $orig = $uri; + +require URI; + +my @ctor_arg = ($uri); +push(@ctor_arg, shift) while @ARGV && $ARGV[0] =~ s/^\+//; + +$uri = URI->new(@ctor_arg); + +if (@ARGV) { + my $method = shift; + my $list_context = ($method =~ s/^\@//); + #print "URI->new(\"$uri\")->$method ==> "; + for (@ARGV) { + undef($_) if $_ eq "UNDEF"; + } + + my @result; + if ($list_context) { + @result = $uri->$method(@ARGV); + } else { + @result = scalar($uri->$method(@ARGV)); + } + + for (@result) { + if (defined) { + $_ = "«$_»" if /^\s*$/; + } else { + $_ = ""; + } + } + print join(" ", @result), "\n"; +} +print "$uri\n" unless $orig eq $uri; +exit; + +# Some extra methods that might be nice + +sub UNIVERSAL::class { ref($_[0]) } + +sub UNIVERSAL::dump { + require Data::Dumper; + my $d = Data::Dumper->Dump(\@_, ["self", "arg1", "arg2", "arg3", "arg4"]); + chomp($d); + $d; +} -- cgit v1.2.1