diff options
author | Gisle Aas <aas@oslonett.no> | 1997-10-16 11:41:17 +0000 |
---|---|---|
committer | Gisle Aas <aas@oslonett.no> | 1997-10-16 11:41:17 +0000 |
commit | 813281d3d9140f0912dfdfd970ab5e73eedd35b8 (patch) | |
tree | 6ab399fcf15db38a174d2ce252eaef13fee02427 | |
parent | f5c86855d5ffdb567b8687b1b8fd8f5ace6bbfc8 (diff) | |
download | uri-813281d3d9140f0912dfdfd970ab5e73eedd35b8.tar.gz |
Function name change: uf_urlstr(), uf_url()
Support $ENV{URL_GUESS_PATTERN}
-rw-r--r-- | URI/Heuristic.pm | 109 |
1 files changed, 75 insertions, 34 deletions
diff --git a/URI/Heuristic.pm b/URI/Heuristic.pm index 87dc429..4fa4284 100644 --- a/URI/Heuristic.pm +++ b/URI/Heuristic.pm @@ -1,39 +1,71 @@ package URI::Heuristic; -# $Id: Heuristic.pm,v 4.4 1997/10/14 08:35:39 aas Exp $ +# $Id: Heuristic.pm,v 4.5 1997/10/16 11:41:17 aas Exp $ =head1 NAME -friendly_url - Expand URL using heuristics +uf_urlstr - Expand URL using heuristics =head1 SYNOPSIS - use URI::Heuristic qw(friendly_url); - $url = friendly_url("perl"); # http://www.perl.com - $url = friendly_url("www.sol.no/sol"); # http://www.sol.no/no - $url = friendly_url("aas"); # http://www.aas.no - $url = friendly_url("ftp.funet.fi"); # ftp://ftp.funet.fi - $url = friendly_url("/etc/passwd"); # file:/etc/passwd + use URI::Heuristic qw(uf_urlstr); + $url = uf_urlstr("perl"); # http://www.perl.com + $url = uf_urlstr("www.sol.no/sol"); # http://www.sol.no/no + $url = uf_urlstr("aas"); # http://www.aas.no + $url = uf_urlstr("ftp.funet.fi"); # ftp://ftp.funet.fi + $url = uf_urlstr("/etc/passwd"); # file:/etc/passwd =head1 DESCRIPTION This module provide functions that expand strings into real URLs using -some heuristics. The following functions are provided: +some (random) heuristics. Already expanded URLs are not modified and +are returned unchanged. + +The following functions are provided: =over 4 -=item friendly_url($str) +=item uf_urlstr($str) -The friendly_url() function will try to make the string passed as -argument into a proper absolute URL string. +The uf_urlstr() function will try to make the string passed as +argument into a proper absolute URL string. The "uf_" prefix stands +for "User Friendly". -=item url($str) +=item uf_url($str) -This functions work the same way as friendly_url() but it will +This functions work the same way as uf_urlstr() but it will return a C<URI::URL> object. =back +=head1 ENVIRONMENT + +If the hostname portion of a URL does not contain any dots, then +centain qualified guesses will be made. These guesses are governed be +the following two environment variables. + +=over 10 + +=item COUNTRY + +This is the two letter country code (ISO 3166) for your location. If +the domain name of your host ends with two letters, then it is taken +to be the default country. See also L<Locale::Country>. + +=item URL_GUESS_PATTERN + +Contain a space separated list of URL patterns to try. The string +"ACME" is used as a placeholder for the host name in the URL provided. +Example: + + URL_GUESS_PATTERN="www.ACME.no www.ACME.se www.ACME.com" + export URL_GUESS_PATTERN + +Specifying URL_GUESS_PATTERN disables any guessing rules based on +country. An empty URL_GUESS_PATTERN disables any guessing that +involves host name lookups. + +=back =head1 COPYRIGHT @@ -46,42 +78,43 @@ modify it under the same terms as Perl itself. use strict; -use vars qw(@EXPORT_OK %LOCAL_GUESSING $DEBUG); +use vars qw(@EXPORT_OK $MY_COUNTRY %LOCAL_GUESSING $DEBUG); require Exporter; *import = \&Exporter::import; -@EXPORT_OK = qw(url friendly_url); +@EXPORT_OK = qw(uf_url uf_urlstr); -my $my_country; eval { require Net::Domain; my $fqdn = Net::Domain::hostfqdn(); - $my_country = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/; + $MY_COUNTRY = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/; # Some other heuristics to guess country? Perhaps looking # at some environment variable (LANG, LC_ALL, ???) - $my_country = $ENV{COUNTRY} if exists $ENV{COUNTRY}; + $MY_COUNTRY = $ENV{COUNTRY} if exists $ENV{COUNTRY}; }; %LOCAL_GUESSING = ( 'us' => [qw(www.ACME.gov www.ACME.mil)], - 'uk' => [qw(www.ACME.co.uk www.ACME.ac.uk)], + 'uk' => [qw(www.ACME.co.uk www.ACME.org.uk www.ACME.ac.uk)], 'au' => [qw(www.ACME.com.au www.ACME.org.au www.ACME.edu.au)], 'il' => [qw(www.ACME.co.il www.ACME.org.il www.ACME.net.il)], + # send corrections and new entries to <aas@sn.no> ); -sub url ($) # h_url(), url2(), uf_url() +sub uf_url ($) { require URI::URL; - URI::URL->new(friendly_url($_[0])); + URI::URL->new(uf_urlstr($_[0])); } -sub friendly_url ($) # expand_url(), uf_urlstr() +sub uf_urlstr ($) { local($_) = @_; + print STDERR "uf_urlstr: resolving $_\n" if $DEBUG; return unless defined; s/^\s+//; @@ -105,27 +138,35 @@ sub friendly_url ($) # expand_url(), uf_urlstr() if ($host !~ /\./ && $host ne "localhost") { my @guess; - - if ($my_country) { - my $special = $LOCAL_GUESSING{$my_country}; - if ($special) { - my @special = @$special; - push(@guess, map { s/\bACME\b/$host/; $_ } @special); - } else { - push(@guess, "www.$host.$my_country"); + if (exists $ENV{URL_GUESS_PATTERN}) { + @guess = map { s/\bACME\b/$host/; $_ } + split(' ', $ENV{URL_GUESS_PATTERN}); + } else { + if ($MY_COUNTRY) { + my $special = $LOCAL_GUESSING{$MY_COUNTRY}; + if ($special) { + my @special = @$special; + push(@guess, map { s/\bACME\b/$host/; $_ } + @special); + } else { + push(@guess, "www.$host.$MY_COUNTRY"); + } } + push(@guess, map "www.$host.$_", + "com", "org", "net", "edu", "int"); } - push(@guess, map "www.$host.$_", - "com", "org", "net", "edu", "int"); my $guess; for $guess (@guess) { - print STDERR "Looking up '$guess'\n" if $DEBUG; + print STDERR "uf_urlstr: gethostbyname('$guess')..." + if $DEBUG; if (gethostbyname($guess)) { + print STDERR "yes\n" if $DEBUG; $host = $guess; last; } + print STDERR "no\n" if $DEBUG; } } $_ = "http://$host$_"; |