diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2015-07-25 01:06:42 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2015-07-25 01:06:42 +0000 |
commit | 9165b237ad8fae18b36d4d40d6e2ccfde7b136c7 (patch) | |
tree | 06530ddd6baa7e251c58b6b6729ed458da61a681 /lib/URI.pm | |
download | URI-tarball-9165b237ad8fae18b36d4d40d6e2ccfde7b136c7.tar.gz |
Diffstat (limited to 'lib/URI.pm')
-rw-r--r-- | lib/URI.pm | 1155 |
1 files changed, 1155 insertions, 0 deletions
diff --git a/lib/URI.pm b/lib/URI.pm new file mode 100644 index 0000000..98cd575 --- /dev/null +++ b/lib/URI.pm @@ -0,0 +1,1155 @@ +package URI; + +use strict; +use warnings; + +our $VERSION = "1.69"; + +our ($ABS_REMOTE_LEADING_DOTS, $ABS_ALLOW_RELATIVE_SCHEME, $DEFAULT_QUERY_FORM_DELIMITER); + +my %implements; # mapping from scheme to implementor class + +# Some "official" character classes + +our $reserved = q(;/?:@&=+$,[]); +our $mark = q(-_.!~*'()); #'; emacs +our $unreserved = "A-Za-z0-9\Q$mark\E"; +our $uric = quotemeta($reserved) . $unreserved . "%"; + +our $scheme_re = '[a-zA-Z][a-zA-Z0-9.+\-]*'; + +use Carp (); +use URI::Escape (); + +use overload ('""' => sub { ${$_[0]} }, + '==' => sub { _obj_eq(@_) }, + '!=' => sub { !_obj_eq(@_) }, + fallback => 1, + ); + +# Check if two objects are the same object +sub _obj_eq { + return overload::StrVal($_[0]) eq overload::StrVal($_[1]); +} + +sub new +{ + my($class, $uri, $scheme) = @_; + + $uri = defined ($uri) ? "$uri" : ""; # stringify + # Get rid of potential wrapping + $uri =~ s/^<(?:URL:)?(.*)>$/$1/; # + $uri =~ s/^"(.*)"$/$1/; + $uri =~ s/^\s+//; + $uri =~ s/\s+$//; + + my $impclass; + if ($uri =~ m/^($scheme_re):/so) { + $scheme = $1; + } + else { + if (($impclass = ref($scheme))) { + $scheme = $scheme->scheme; + } + elsif ($scheme && $scheme =~ m/^($scheme_re)(?::|$)/o) { + $scheme = $1; + } + } + $impclass ||= implementor($scheme) || + do { + require URI::_foreign; + $impclass = 'URI::_foreign'; + }; + + return $impclass->_init($uri, $scheme); +} + + +sub new_abs +{ + my($class, $uri, $base) = @_; + $uri = $class->new($uri, $base); + $uri->abs($base); +} + + +sub _init +{ + my $class = shift; + my($str, $scheme) = @_; + # find all funny characters and encode the bytes. + $str = $class->_uric_escape($str); + $str = "$scheme:$str" unless $str =~ /^$scheme_re:/o || + $class->_no_scheme_ok; + my $self = bless \$str, $class; + $self; +} + + +sub _uric_escape +{ + my($class, $str) = @_; + $str =~ s*([^$uric\#])* URI::Escape::escape_char($1) *ego; + utf8::downgrade($str); + return $str; +} + + +sub implementor +{ + my($scheme, $impclass) = @_; + if (!$scheme || $scheme !~ /\A$scheme_re\z/o) { + require URI::_generic; + return "URI::_generic"; + } + + $scheme = lc($scheme); + + if ($impclass) { + # Set the implementor class for a given scheme + my $old = $implements{$scheme}; + $impclass->_init_implementor($scheme); + $implements{$scheme} = $impclass; + return $old; + } + + my $ic = $implements{$scheme}; + return $ic if $ic; + + # scheme not yet known, look for internal or + # preloaded (with 'use') implementation + $ic = "URI::$scheme"; # default location + + # turn scheme into a valid perl identifier by a simple transformation... + $ic =~ s/\+/_P/g; + $ic =~ s/\./_O/g; + $ic =~ s/\-/_/g; + + no strict 'refs'; + # check we actually have one for the scheme: + unless (@{"${ic}::ISA"}) { + # Try to load it + eval "require $ic"; + die $@ if $@ && $@ !~ /Can\'t locate.*in \@INC/; + return undef unless @{"${ic}::ISA"}; + } + + $ic->_init_implementor($scheme); + $implements{$scheme} = $ic; + $ic; +} + + +sub _init_implementor +{ + my($class, $scheme) = @_; + # Remember that one implementor class may actually + # serve to implement several URI schemes. +} + + +sub clone +{ + my $self = shift; + my $other = $$self; + bless \$other, ref $self; +} + +sub TO_JSON { ${$_[0]} } + +sub _no_scheme_ok { 0 } + +sub _scheme +{ + my $self = shift; + + unless (@_) { + return undef unless $$self =~ /^($scheme_re):/o; + return $1; + } + + my $old; + my $new = shift; + if (defined($new) && length($new)) { + Carp::croak("Bad scheme '$new'") unless $new =~ /^$scheme_re$/o; + $old = $1 if $$self =~ s/^($scheme_re)://o; + my $newself = URI->new("$new:$$self"); + $$self = $$newself; + bless $self, ref($newself); + } + else { + if ($self->_no_scheme_ok) { + $old = $1 if $$self =~ s/^($scheme_re)://o; + Carp::carp("Oops, opaque part now look like scheme") + if $^W && $$self =~ m/^$scheme_re:/o + } + else { + $old = $1 if $$self =~ m/^($scheme_re):/o; + } + } + + return $old; +} + +sub scheme +{ + my $scheme = shift->_scheme(@_); + return undef unless defined $scheme; + lc($scheme); +} + +sub has_recognized_scheme { + my $self = shift; + return ref($self) !~ /^URI::_(?:foreign|generic)\z/; +} + +sub opaque +{ + my $self = shift; + + unless (@_) { + $$self =~ /^(?:$scheme_re:)?([^\#]*)/o or die; + return $1; + } + + $$self =~ /^($scheme_re:)? # optional scheme + ([^\#]*) # opaque + (\#.*)? # optional fragment + $/sx or die; + + my $old_scheme = $1; + my $old_opaque = $2; + my $old_frag = $3; + + my $new_opaque = shift; + $new_opaque = "" unless defined $new_opaque; + $new_opaque =~ s/([^$uric])/ URI::Escape::escape_char($1)/ego; + utf8::downgrade($new_opaque); + + $$self = defined($old_scheme) ? $old_scheme : ""; + $$self .= $new_opaque; + $$self .= $old_frag if defined $old_frag; + + $old_opaque; +} + +sub path { goto &opaque } # alias + + +sub fragment +{ + my $self = shift; + unless (@_) { + return undef unless $$self =~ /\#(.*)/s; + return $1; + } + + my $old; + $old = $1 if $$self =~ s/\#(.*)//s; + + my $new_frag = shift; + if (defined $new_frag) { + $new_frag =~ s/([^$uric])/ URI::Escape::escape_char($1) /ego; + utf8::downgrade($new_frag); + $$self .= "#$new_frag"; + } + $old; +} + + +sub as_string +{ + my $self = shift; + $$self; +} + + +sub as_iri +{ + my $self = shift; + my $str = $$self; + if ($str =~ s/%([89a-fA-F][0-9a-fA-F])/chr(hex($1))/eg) { + # All this crap because the more obvious: + # + # Encode::decode("UTF-8", $str, sub { sprintf "%%%02X", shift }) + # + # doesn't work before Encode 2.39. Wait for a standard release + # to bundle that version. + + require Encode; + my $enc = Encode::find_encoding("UTF-8"); + my $u = ""; + while (length $str) { + $u .= $enc->decode($str, Encode::FB_QUIET()); + if (length $str) { + # escape next char + $u .= URI::Escape::escape_char(substr($str, 0, 1, "")); + } + } + $str = $u; + } + return $str; +} + + +sub canonical +{ + # Make sure scheme is lowercased, that we don't escape unreserved chars, + # and that we use upcase escape sequences. + + my $self = shift; + my $scheme = $self->_scheme || ""; + my $uc_scheme = $scheme =~ /[A-Z]/; + my $esc = $$self =~ /%[a-fA-F0-9]{2}/; + return $self unless $uc_scheme || $esc; + + my $other = $self->clone; + if ($uc_scheme) { + $other->_scheme(lc $scheme); + } + if ($esc) { + $$other =~ s{%([0-9a-fA-F]{2})} + { my $a = chr(hex($1)); + $a =~ /^[$unreserved]\z/o ? $a : "%\U$1" + }ge; + } + return $other; +} + +# Compare two URIs, subclasses will provide a more correct implementation +sub eq { + my($self, $other) = @_; + $self = URI->new($self, $other) unless ref $self; + $other = URI->new($other, $self) unless ref $other; + ref($self) eq ref($other) && # same class + $self->canonical->as_string eq $other->canonical->as_string; +} + +# generic-URI transformation methods +sub abs { $_[0]; } +sub rel { $_[0]; } + +sub secure { 0 } + +# help out Storable +sub STORABLE_freeze { + my($self, $cloning) = @_; + return $$self; +} + +sub STORABLE_thaw { + my($self, $cloning, $str) = @_; + $$self = $str; +} + +1; + +__END__ + +=head1 NAME + +URI - Uniform Resource Identifiers (absolute and relative) + +=head1 SYNOPSIS + + $u1 = URI->new("http://www.perl.com"); + $u2 = URI->new("foo", "http"); + $u3 = $u2->abs($u1); + $u4 = $u3->clone; + $u5 = URI->new("HTTP://WWW.perl.com:80")->canonical; + + $str = $u->as_string; + $str = "$u"; + + $scheme = $u->scheme; + $opaque = $u->opaque; + $path = $u->path; + $frag = $u->fragment; + + $u->scheme("ftp"); + $u->host("ftp.perl.com"); + $u->path("cpan/"); + +=head1 DESCRIPTION + +This module implements the C<URI> class. Objects of this class +represent "Uniform Resource Identifier references" as specified in RFC +2396 (and updated by RFC 2732). + +A Uniform Resource Identifier is a compact string of characters that +identifies an abstract or physical resource. A Uniform Resource +Identifier can be further classified as either a Uniform Resource Locator +(URL) or a Uniform Resource Name (URN). The distinction between URL +and URN does not matter to the C<URI> class interface. A +"URI-reference" is a URI that may have additional information attached +in the form of a fragment identifier. + +An absolute URI reference consists of three parts: a I<scheme>, a +I<scheme-specific part> and a I<fragment> identifier. A subset of URI +references share a common syntax for hierarchical namespaces. For +these, the scheme-specific part is further broken down into +I<authority>, I<path> and I<query> components. These URIs can also +take the form of relative URI references, where the scheme (and +usually also the authority) component is missing, but implied by the +context of the URI reference. The three forms of URI reference +syntax are summarized as follows: + + <scheme>:<scheme-specific-part>#<fragment> + <scheme>://<authority><path>?<query>#<fragment> + <path>?<query>#<fragment> + +The components into which a URI reference can be divided depend on the +I<scheme>. The C<URI> class provides methods to get and set the +individual components. The methods available for a specific +C<URI> object depend on the scheme. + +=head1 CONSTRUCTORS + +The following methods construct new C<URI> objects: + +=over 4 + +=item $uri = URI->new( $str ) + +=item $uri = URI->new( $str, $scheme ) + +Constructs a new URI object. The string +representation of a URI is given as argument, together with an optional +scheme specification. Common URI wrappers like "" and <>, as well as +leading and trailing white space, are automatically removed from +the $str argument before it is processed further. + +The constructor determines the scheme, maps this to an appropriate +URI subclass, constructs a new object of that class and returns it. + +If the scheme isn't one of those that URI recognizes, you still get +an URI object back that you can access the generic methods on. The +C<< $uri->has_recognized_scheme >> method can be used to test for +this. + +The $scheme argument is only used when $str is a +relative URI. It can be either a simple string that +denotes the scheme, a string containing an absolute URI reference, or +an absolute C<URI> object. If no $scheme is specified for a relative +URI $str, then $str is simply treated as a generic URI (no scheme-specific +methods available). + +The set of characters available for building URI references is +restricted (see L<URI::Escape>). Characters outside this set are +automatically escaped by the URI constructor. + +=item $uri = URI->new_abs( $str, $base_uri ) + +Constructs a new absolute URI object. The $str argument can +denote a relative or absolute URI. If relative, then it is +absolutized using $base_uri as base. The $base_uri must be an absolute +URI. + +=item $uri = URI::file->new( $filename ) + +=item $uri = URI::file->new( $filename, $os ) + +Constructs a new I<file> URI from a file name. See L<URI::file>. + +=item $uri = URI::file->new_abs( $filename ) + +=item $uri = URI::file->new_abs( $filename, $os ) + +Constructs a new absolute I<file> URI from a file name. See +L<URI::file>. + +=item $uri = URI::file->cwd + +Returns the current working directory as a I<file> URI. See +L<URI::file>. + +=item $uri->clone + +Returns a copy of the $uri. + +=back + +=head1 COMMON METHODS + +The methods described in this section are available for all C<URI> +objects. + +Methods that give access to components of a URI always return the +old value of the component. The value returned is C<undef> if the +component was not present. There is generally a difference between a +component that is empty (represented as C<"">) and a component that is +missing (represented as C<undef>). If an accessor method is given an +argument, it updates the corresponding component in addition to +returning the old value of the component. Passing an undefined +argument removes the component (if possible). The description of +each accessor method indicates whether the component is passed as +an escaped (percent-encoded) or an unescaped string. A component that can be further +divided into sub-parts are usually passed escaped, as unescaping might +change its semantics. + +The common methods available for all URI are: + +=over 4 + +=item $uri->scheme + +=item $uri->scheme( $new_scheme ) + +Sets and returns the scheme part of the $uri. If the $uri is +relative, then $uri->scheme returns C<undef>. If called with an +argument, it updates the scheme of $uri, possibly changing the +class of $uri, and returns the old scheme value. The method croaks +if the new scheme name is illegal; a scheme name must begin with a +letter and must consist of only US-ASCII letters, numbers, and a few +special marks: ".", "+", "-". This restriction effectively means +that the scheme must be passed unescaped. Passing an undefined +argument to the scheme method makes the URI relative (if possible). + +Letter case does not matter for scheme names. The string +returned by $uri->scheme is always lowercase. If you want the scheme +just as it was written in the URI in its original case, +you can use the $uri->_scheme method instead. + +=item $uri->has_recognized_scheme + +Returns TRUE if the URI scheme is one that URI recognizes. + +It will also be TRUE for relative URLs where a recognized +scheme was provided to the constructor, even if C<< $uri->scheme >> +returns C<undef> for these. + +=item $uri->opaque + +=item $uri->opaque( $new_opaque ) + +Sets and returns the scheme-specific part of the $uri +(everything between the scheme and the fragment) +as an escaped string. + +=item $uri->path + +=item $uri->path( $new_path ) + +Sets and returns the same value as $uri->opaque unless the URI +supports the generic syntax for hierarchical namespaces. +In that case the generic method is overridden to set and return +the part of the URI between the I<host name> and the I<fragment>. + +=item $uri->fragment + +=item $uri->fragment( $new_frag ) + +Returns the fragment identifier of a URI reference +as an escaped string. + +=item $uri->as_string + +Returns a URI object to a plain ASCII string. URI objects are +also converted to plain strings automatically by overloading. This +means that $uri objects can be used as plain strings in most Perl +constructs. + +=item $uri->as_iri + +Returns a Unicode string representing the URI. Escaped UTF-8 sequences +representing non-ASCII characters are turned into their corresponding Unicode +code point. + +=item $uri->canonical + +Returns a normalized version of the URI. The rules +for normalization are scheme-dependent. They usually involve +lowercasing the scheme and Internet host name components, +removing the explicit port specification if it matches the default port, +uppercasing all escape sequences, and unescaping octets that can be +better represented as plain characters. + +For efficiency reasons, if the $uri is already in normalized form, +then a reference to it is returned instead of a copy. + +=item $uri->eq( $other_uri ) + +=item URI::eq( $first_uri, $other_uri ) + +Tests whether two URI references are equal. URI references +that normalize to the same string are considered equal. The method +can also be used as a plain function which can also test two string +arguments. + +If you need to test whether two C<URI> object references denote the +same object, use the '==' operator. + +=item $uri->abs( $base_uri ) + +Returns an absolute URI reference. If $uri is already +absolute, then a reference to it is simply returned. If the $uri +is relative, then a new absolute URI is constructed by combining the +$uri and the $base_uri, and returned. + +=item $uri->rel( $base_uri ) + +Returns a relative URI reference if it is possible to +make one that denotes the same resource relative to $base_uri. +If not, then $uri is simply returned. + +=item $uri->secure + +Returns a TRUE value if the URI is considered to point to a resource on +a secure channel, such as an SSL or TLS encrypted one. + +=back + +=head1 GENERIC METHODS + +The following methods are available to schemes that use the +common/generic syntax for hierarchical namespaces. The descriptions of +schemes below indicate which these are. Unrecognized schemes are +assumed to support the generic syntax, and therefore the following +methods: + +=over 4 + +=item $uri->authority + +=item $uri->authority( $new_authority ) + +Sets and returns the escaped authority component +of the $uri. + +=item $uri->path + +=item $uri->path( $new_path ) + +Sets and returns the escaped path component of +the $uri (the part between the host name and the query or fragment). +The path can never be undefined, but it can be the empty string. + +=item $uri->path_query + +=item $uri->path_query( $new_path_query ) + +Sets and returns the escaped path and query +components as a single entity. The path and the query are +separated by a "?" character, but the query can itself contain "?". + +=item $uri->path_segments + +=item $uri->path_segments( $segment, ... ) + +Sets and returns the path. In a scalar context, it returns +the same value as $uri->path. In a list context, it returns the +unescaped path segments that make up the path. Path segments that +have parameters are returned as an anonymous array. The first element +is the unescaped path segment proper; subsequent elements are escaped +parameter strings. Such an anonymous array uses overloading so it can +be treated as a string too, but this string does not include the +parameters. + +Note that absolute paths have the empty string as their first +I<path_segment>, i.e. the I<path> C</foo/bar> have 3 +I<path_segments>; "", "foo" and "bar". + +=item $uri->query + +=item $uri->query( $new_query ) + +Sets and returns the escaped query component of +the $uri. + +=item $uri->query_form + +=item $uri->query_form( $key1 => $val1, $key2 => $val2, ... ) + +=item $uri->query_form( $key1 => $val1, $key2 => $val2, ..., $delim ) + +=item $uri->query_form( \@key_value_pairs ) + +=item $uri->query_form( \@key_value_pairs, $delim ) + +=item $uri->query_form( \%hash ) + +=item $uri->query_form( \%hash, $delim ) + +Sets and returns query components that use the +I<application/x-www-form-urlencoded> format. Key/value pairs are +separated by "&", and the key is separated from the value by a "=" +character. + +The form can be set either by passing separate key/value pairs, or via +an array or hash reference. Passing an empty array or an empty hash +removes the query component, whereas passing no arguments at all leaves +the component unchanged. The order of keys is undefined if a hash +reference is passed. The old value is always returned as a list of +separate key/value pairs. Assigning this list to a hash is unwise as +the keys returned might repeat. + +The values passed when setting the form can be plain strings or +references to arrays of strings. Passing an array of values has the +same effect as passing the key repeatedly with one value at a time. +All the following statements have the same effect: + + $uri->query_form(foo => 1, foo => 2); + $uri->query_form(foo => [1, 2]); + $uri->query_form([ foo => 1, foo => 2 ]); + $uri->query_form([ foo => [1, 2] ]); + $uri->query_form({ foo => [1, 2] }); + +The $delim parameter can be passed as ";" to force the key/value pairs +to be delimited by ";" instead of "&" in the query string. This +practice is often recommended for URLs embedded in HTML or XML +documents as this avoids the trouble of escaping the "&" character. +You might also set the $URI::DEFAULT_QUERY_FORM_DELIMITER variable to +";" for the same global effect. + +The C<URI::QueryParam> module can be loaded to add further methods to +manipulate the form of a URI. See L<URI::QueryParam> for details. + +=item $uri->query_keywords + +=item $uri->query_keywords( $keywords, ... ) + +=item $uri->query_keywords( \@keywords ) + +Sets and returns query components that use the +keywords separated by "+" format. + +The keywords can be set either by passing separate keywords directly +or by passing a reference to an array of keywords. Passing an empty +array removes the query component, whereas passing no arguments at +all leaves the component unchanged. The old value is always returned +as a list of separate words. + +=back + +=head1 SERVER METHODS + +For schemes where the I<authority> component denotes an Internet host, +the following methods are available in addition to the generic +methods. + +=over 4 + +=item $uri->userinfo + +=item $uri->userinfo( $new_userinfo ) + +Sets and returns the escaped userinfo part of the +authority component. + +For some schemes this is a user name and a password separated by +a colon. This practice is not recommended. Embedding passwords in +clear text (such as URI) has proven to be a security risk in almost +every case where it has been used. + +=item $uri->host + +=item $uri->host( $new_host ) + +Sets and returns the unescaped hostname. + +If the $new_host string ends with a colon and a number, then this +number also sets the port. + +For IPv6 addresses the brackets around the raw address is removed in the return +value from $uri->host. When setting the host attribute to an IPv6 address you +can use a raw address or one enclosed in brackets. The address needs to be +enclosed in brackets if you want to pass in a new port value as well. + +=item $uri->ihost + +Returns the host in Unicode form. Any IDNA A-labels are turned into U-labels. + +=item $uri->port + +=item $uri->port( $new_port ) + +Sets and returns the port. The port is a simple integer +that should be greater than 0. + +If a port is not specified explicitly in the URI, then the URI scheme's default port +is returned. If you don't want the default port +substituted, then you can use the $uri->_port method instead. + +=item $uri->host_port + +=item $uri->host_port( $new_host_port ) + +Sets and returns the host and port as a single +unit. The returned value includes a port, even if it matches the +default port. The host part and the port part are separated by a +colon: ":". + +For IPv6 addresses the bracketing is preserved; thus +URI->new("http://[::1]/")->host_port returns "[::1]:80". Contrast this with +$uri->host which will remove the brackets. + +=item $uri->default_port + +Returns the default port of the URI scheme to which $uri +belongs. For I<http> this is the number 80, for I<ftp> this +is the number 21, etc. The default port for a scheme can not be +changed. + +=back + +=head1 SCHEME-SPECIFIC SUPPORT + +Scheme-specific support is provided for the following URI schemes. For C<URI> +objects that do not belong to one of these, you can only use the common and +generic methods. + +=over 4 + +=item B<data>: + +The I<data> URI scheme is specified in RFC 2397. It allows inclusion +of small data items as "immediate" data, as if it had been included +externally. + +C<URI> objects belonging to the data scheme support the common methods +and two new methods to access their scheme-specific components: +$uri->media_type and $uri->data. See L<URI::data> for details. + +=item B<file>: + +An old specification of the I<file> URI scheme is found in RFC 1738. +A new RFC 2396 based specification in not available yet, but file URI +references are in common use. + +C<URI> objects belonging to the file scheme support the common and +generic methods. In addition, they provide two methods for mapping file URIs +back to local file names; $uri->file and $uri->dir. See L<URI::file> +for details. + +=item B<ftp>: + +An old specification of the I<ftp> URI scheme is found in RFC 1738. A +new RFC 2396 based specification in not available yet, but ftp URI +references are in common use. + +C<URI> objects belonging to the ftp scheme support the common, +generic and server methods. In addition, they provide two methods for +accessing the userinfo sub-components: $uri->user and $uri->password. + +=item B<gopher>: + +The I<gopher> URI scheme is specified in +<draft-murali-url-gopher-1996-12-04> and will hopefully be available +as a RFC 2396 based specification. + +C<URI> objects belonging to the gopher scheme support the common, +generic and server methods. In addition, they support some methods for +accessing gopher-specific path components: $uri->gopher_type, +$uri->selector, $uri->search, $uri->string. + +=item B<http>: + +The I<http> URI scheme is specified in RFC 2616. +The scheme is used to reference resources hosted by HTTP servers. + +C<URI> objects belonging to the http scheme support the common, +generic and server methods. + +=item B<https>: + +The I<https> URI scheme is a Netscape invention which is commonly +implemented. The scheme is used to reference HTTP servers through SSL +connections. Its syntax is the same as http, but the default +port is different. + +=item B<ldap>: + +The I<ldap> URI scheme is specified in RFC 2255. LDAP is the +Lightweight Directory Access Protocol. An ldap URI describes an LDAP +search operation to perform to retrieve information from an LDAP +directory. + +C<URI> objects belonging to the ldap scheme support the common, +generic and server methods as well as ldap-specific methods: $uri->dn, +$uri->attributes, $uri->scope, $uri->filter, $uri->extensions. See +L<URI::ldap> for details. + +=item B<ldapi>: + +Like the I<ldap> URI scheme, but uses a UNIX domain socket. The +server methods are not supported, and the local socket path is +available as $uri->un_path. The I<ldapi> scheme is used by the +OpenLDAP package. There is no real specification for it, but it is +mentioned in various OpenLDAP manual pages. + +=item B<ldaps>: + +Like the I<ldap> URI scheme, but uses an SSL connection. This +scheme is deprecated, as the preferred way is to use the I<start_tls> +mechanism. + +=item B<mailto>: + +The I<mailto> URI scheme is specified in RFC 2368. The scheme was +originally used to designate the Internet mailing address of an +individual or service. It has (in RFC 2368) been extended to allow +setting of other mail header fields and the message body. + +C<URI> objects belonging to the mailto scheme support the common +methods and the generic query methods. In addition, they support the +following mailto-specific methods: $uri->to, $uri->headers. + +Note that the "foo@example.com" part of a mailto is I<not> the +C<userinfo> and C<host> but instead the C<path>. This allows a +mailto URI to contain multiple comma separated email addresses. + +=item B<mms>: + +The I<mms> URL specification can be found at L<http://sdp.ppona.com/>. +C<URI> objects belonging to the mms scheme support the common, +generic, and server methods, with the exception of userinfo and +query-related sub-components. + +=item B<news>: + +The I<news>, I<nntp> and I<snews> URI schemes are specified in +<draft-gilman-news-url-01> and will hopefully be available as an RFC +2396 based specification soon. + +C<URI> objects belonging to the news scheme support the common, +generic and server methods. In addition, they provide some methods to +access the path: $uri->group and $uri->message. + +=item B<nntp>: + +See I<news> scheme. + +=item B<pop>: + +The I<pop> URI scheme is specified in RFC 2384. The scheme is used to +reference a POP3 mailbox. + +C<URI> objects belonging to the pop scheme support the common, generic +and server methods. In addition, they provide two methods to access the +userinfo components: $uri->user and $uri->auth + +=item B<rlogin>: + +An old specification of the I<rlogin> URI scheme is found in RFC +1738. C<URI> objects belonging to the rlogin scheme support the +common, generic and server methods. + +=item B<rtsp>: + +The I<rtsp> URL specification can be found in section 3.2 of RFC 2326. +C<URI> objects belonging to the rtsp scheme support the common, +generic, and server methods, with the exception of userinfo and +query-related sub-components. + +=item B<rtspu>: + +The I<rtspu> URI scheme is used to talk to RTSP servers over UDP +instead of TCP. The syntax is the same as rtsp. + +=item B<rsync>: + +Information about rsync is available from L<http://rsync.samba.org/>. +C<URI> objects belonging to the rsync scheme support the common, +generic and server methods. In addition, they provide methods to +access the userinfo sub-components: $uri->user and $uri->password. + +=item B<sip>: + +The I<sip> URI specification is described in sections 19.1 and 25 +of RFC 3261. C<URI> objects belonging to the sip scheme support the +common, generic, and server methods with the exception of path related +sub-components. In addition, they provide two methods to get and set +I<sip> parameters: $uri->params_form and $uri->params. + +=item B<sips>: + +See I<sip> scheme. Its syntax is the same as sip, but the default +port is different. + +=item B<snews>: + +See I<news> scheme. Its syntax is the same as news, but the default +port is different. + +=item B<telnet>: + +An old specification of the I<telnet> URI scheme is found in RFC +1738. C<URI> objects belonging to the telnet scheme support the +common, generic and server methods. + +=item B<tn3270>: + +These URIs are used like I<telnet> URIs but for connections to IBM +mainframes. C<URI> objects belonging to the tn3270 scheme support the +common, generic and server methods. + +=item B<ssh>: + +Information about ssh is available at L<http://www.openssh.com/>. +C<URI> objects belonging to the ssh scheme support the common, +generic and server methods. In addition, they provide methods to +access the userinfo sub-components: $uri->user and $uri->password. + +=item B<sftp>: + +C<URI> objects belonging to the sftp scheme support the common, +generic and server methods. In addition, they provide methods to +access the userinfo sub-components: $uri->user and $uri->password. + +=item B<urn>: + +The syntax of Uniform Resource Names is specified in RFC 2141. C<URI> +objects belonging to the urn scheme provide the common methods, and also the +methods $uri->nid and $uri->nss, which return the Namespace Identifier +and the Namespace-Specific String respectively. + +The Namespace Identifier basically works like the Scheme identifier of +URIs, and further divides the URN namespace. Namespace Identifier +assignments are maintained at +L<http://www.iana.org/assignments/urn-namespaces>. + +Letter case is not significant for the Namespace Identifier. It is +always returned in lower case by the $uri->nid method. The $uri->_nid +method can be used if you want it in its original case. + +=item B<urn>:B<isbn>: + +The C<urn:isbn:> namespace contains International Standard Book +Numbers (ISBNs) and is described in RFC 3187. A C<URI> object belonging +to this namespace has the following extra methods (if the +Business::ISBN module is available): $uri->isbn, +$uri->isbn_publisher_code, $uri->isbn_group_code (formerly isbn_country_code, +which is still supported by issues a deprecation warning), $uri->isbn_as_ean. + +=item B<urn>:B<oid>: + +The C<urn:oid:> namespace contains Object Identifiers (OIDs) and is +described in RFC 3061. An object identifier consists of sequences of digits +separated by dots. A C<URI> object belonging to this namespace has an +additional method called $uri->oid that can be used to get/set the oid +value. In a list context, oid numbers are returned as separate elements. + +=back + +=head1 CONFIGURATION VARIABLES + +The following configuration variables influence how the class and its +methods behave: + +=over 4 + +=item $URI::ABS_ALLOW_RELATIVE_SCHEME + +Some older parsers used to allow the scheme name to be present in the +relative URL if it was the same as the base URL scheme. RFC 2396 says +that this should be avoided, but you can enable this old behaviour by +setting the $URI::ABS_ALLOW_RELATIVE_SCHEME variable to a TRUE value. +The difference is demonstrated by the following examples: + + URI->new("http:foo")->abs("http://host/a/b") + ==> "http:foo" + + local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1; + URI->new("http:foo")->abs("http://host/a/b") + ==> "http:/host/a/foo" + + +=item $URI::ABS_REMOTE_LEADING_DOTS + +You can also have the abs() method ignore excess ".." +segments in the relative URI by setting $URI::ABS_REMOTE_LEADING_DOTS +to a TRUE value. The difference is demonstrated by the following +examples: + + URI->new("../../../foo")->abs("http://host/a/b") + ==> "http://host/../../foo" + + local $URI::ABS_REMOTE_LEADING_DOTS = 1; + URI->new("../../../foo")->abs("http://host/a/b") + ==> "http://host/foo" + +=item $URI::DEFAULT_QUERY_FORM_DELIMITER + +This value can be set to ";" to have the query form C<key=value> pairs +delimited by ";" instead of "&" which is the default. + +=back + +=head1 BUGS + +There are some things that are not quite right: + +=over + +=item * + +Using regexp variables like $1 directly as arguments to the URI accessor methods +does not work too well with current perl implementations. I would argue +that this is actually a bug in perl. The workaround is to quote +them. Example: + + /(...)/ || die; + $u->query("$1"); + + +=item * + +The escaping (percent encoding) of chars in the 128 .. 255 range passed to the +URI constructor or when setting URI parts using the accessor methods depend on +the state of the internal UTF8 flag (see utf8::is_utf8) of the string passed. +If the UTF8 flag is set the UTF-8 encoded version of the character is percent +encoded. If the UTF8 flag isn't set the Latin-1 version (byte) of the +character is percent encoded. This basically exposes the internal encoding of +Perl strings. + +=back + +=head1 PARSING URIs WITH REGEXP + +As an alternative to this module, the following (official) regular +expression can be used to decode a URI: + + my($scheme, $authority, $path, $query, $fragment) = + $uri =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; + +The C<URI::Split> module provides the function uri_split() as a +readable alternative. + +=head1 SEE ALSO + +L<URI::file>, L<URI::WithBase>, L<URI::QueryParam>, L<URI::Escape>, +L<URI::Split>, L<URI::Heuristic> + +RFC 2396: "Uniform Resource Identifiers (URI): Generic Syntax", +Berners-Lee, Fielding, Masinter, August 1998. + +L<http://www.iana.org/assignments/uri-schemes> + +L<http://www.iana.org/assignments/urn-namespaces> + +L<http://www.w3.org/Addressing/> + +=head1 COPYRIGHT + +Copyright 1995-2009 Gisle Aas. + +Copyright 1995 Martijn Koster. + +This program is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=head1 AUTHORS / ACKNOWLEDGMENTS + +This module is based on the C<URI::URL> module, which in turn was +(distantly) based on the C<wwwurl.pl> code in the libwww-perl for +perl4 developed by Roy Fielding, as part of the Arcadia project at the +University of California, Irvine, with contributions from Brooks +Cutter. + +C<URI::URL> was developed by Gisle Aas, Tim Bunce, Roy Fielding and +Martijn Koster with input from other people on the libwww-perl mailing +list. + +C<URI> and related subclasses was developed by Gisle Aas. + +=cut |