summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGisle Aas <aas@oslonett.no>1997-10-16 11:41:17 +0000
committerGisle Aas <aas@oslonett.no>1997-10-16 11:41:17 +0000
commit813281d3d9140f0912dfdfd970ab5e73eedd35b8 (patch)
tree6ab399fcf15db38a174d2ce252eaef13fee02427
parentf5c86855d5ffdb567b8687b1b8fd8f5ace6bbfc8 (diff)
downloaduri-813281d3d9140f0912dfdfd970ab5e73eedd35b8.tar.gz
Function name change: uf_urlstr(), uf_url()
Support $ENV{URL_GUESS_PATTERN}
-rw-r--r--URI/Heuristic.pm109
1 files changed, 75 insertions, 34 deletions
diff --git a/URI/Heuristic.pm b/URI/Heuristic.pm
index 87dc429..4fa4284 100644
--- a/URI/Heuristic.pm
+++ b/URI/Heuristic.pm
@@ -1,39 +1,71 @@
package URI::Heuristic;
-# $Id: Heuristic.pm,v 4.4 1997/10/14 08:35:39 aas Exp $
+# $Id: Heuristic.pm,v 4.5 1997/10/16 11:41:17 aas Exp $
=head1 NAME
-friendly_url - Expand URL using heuristics
+uf_urlstr - Expand URL using heuristics
=head1 SYNOPSIS
- use URI::Heuristic qw(friendly_url);
- $url = friendly_url("perl"); # http://www.perl.com
- $url = friendly_url("www.sol.no/sol"); # http://www.sol.no/no
- $url = friendly_url("aas"); # http://www.aas.no
- $url = friendly_url("ftp.funet.fi"); # ftp://ftp.funet.fi
- $url = friendly_url("/etc/passwd"); # file:/etc/passwd
+ use URI::Heuristic qw(uf_urlstr);
+ $url = uf_urlstr("perl"); # http://www.perl.com
+ $url = uf_urlstr("www.sol.no/sol"); # http://www.sol.no/no
+ $url = uf_urlstr("aas"); # http://www.aas.no
+ $url = uf_urlstr("ftp.funet.fi"); # ftp://ftp.funet.fi
+ $url = uf_urlstr("/etc/passwd"); # file:/etc/passwd
=head1 DESCRIPTION
This module provide functions that expand strings into real URLs using
-some heuristics. The following functions are provided:
+some (random) heuristics. Already expanded URLs are not modified and
+are returned unchanged.
+
+The following functions are provided:
=over 4
-=item friendly_url($str)
+=item uf_urlstr($str)
-The friendly_url() function will try to make the string passed as
-argument into a proper absolute URL string.
+The uf_urlstr() function will try to make the string passed as
+argument into a proper absolute URL string. The "uf_" prefix stands
+for "User Friendly".
-=item url($str)
+=item uf_url($str)
-This functions work the same way as friendly_url() but it will
+This functions work the same way as uf_urlstr() but it will
return a C<URI::URL> object.
=back
+=head1 ENVIRONMENT
+
+If the hostname portion of a URL does not contain any dots, then
+centain qualified guesses will be made. These guesses are governed be
+the following two environment variables.
+
+=over 10
+
+=item COUNTRY
+
+This is the two letter country code (ISO 3166) for your location. If
+the domain name of your host ends with two letters, then it is taken
+to be the default country. See also L<Locale::Country>.
+
+=item URL_GUESS_PATTERN
+
+Contain a space separated list of URL patterns to try. The string
+"ACME" is used as a placeholder for the host name in the URL provided.
+Example:
+
+ URL_GUESS_PATTERN="www.ACME.no www.ACME.se www.ACME.com"
+ export URL_GUESS_PATTERN
+
+Specifying URL_GUESS_PATTERN disables any guessing rules based on
+country. An empty URL_GUESS_PATTERN disables any guessing that
+involves host name lookups.
+
+=back
=head1 COPYRIGHT
@@ -46,42 +78,43 @@ modify it under the same terms as Perl itself.
use strict;
-use vars qw(@EXPORT_OK %LOCAL_GUESSING $DEBUG);
+use vars qw(@EXPORT_OK $MY_COUNTRY %LOCAL_GUESSING $DEBUG);
require Exporter;
*import = \&Exporter::import;
-@EXPORT_OK = qw(url friendly_url);
+@EXPORT_OK = qw(uf_url uf_urlstr);
-my $my_country;
eval {
require Net::Domain;
my $fqdn = Net::Domain::hostfqdn();
- $my_country = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/;
+ $MY_COUNTRY = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/;
# Some other heuristics to guess country? Perhaps looking
# at some environment variable (LANG, LC_ALL, ???)
- $my_country = $ENV{COUNTRY} if exists $ENV{COUNTRY};
+ $MY_COUNTRY = $ENV{COUNTRY} if exists $ENV{COUNTRY};
};
%LOCAL_GUESSING =
(
'us' => [qw(www.ACME.gov www.ACME.mil)],
- 'uk' => [qw(www.ACME.co.uk www.ACME.ac.uk)],
+ 'uk' => [qw(www.ACME.co.uk www.ACME.org.uk www.ACME.ac.uk)],
'au' => [qw(www.ACME.com.au www.ACME.org.au www.ACME.edu.au)],
'il' => [qw(www.ACME.co.il www.ACME.org.il www.ACME.net.il)],
+ # send corrections and new entries to <aas@sn.no>
);
-sub url ($) # h_url(), url2(), uf_url()
+sub uf_url ($)
{
require URI::URL;
- URI::URL->new(friendly_url($_[0]));
+ URI::URL->new(uf_urlstr($_[0]));
}
-sub friendly_url ($) # expand_url(), uf_urlstr()
+sub uf_urlstr ($)
{
local($_) = @_;
+ print STDERR "uf_urlstr: resolving $_\n" if $DEBUG;
return unless defined;
s/^\s+//;
@@ -105,27 +138,35 @@ sub friendly_url ($) # expand_url(), uf_urlstr()
if ($host !~ /\./ && $host ne "localhost") {
my @guess;
-
- if ($my_country) {
- my $special = $LOCAL_GUESSING{$my_country};
- if ($special) {
- my @special = @$special;
- push(@guess, map { s/\bACME\b/$host/; $_ } @special);
- } else {
- push(@guess, "www.$host.$my_country");
+ if (exists $ENV{URL_GUESS_PATTERN}) {
+ @guess = map { s/\bACME\b/$host/; $_ }
+ split(' ', $ENV{URL_GUESS_PATTERN});
+ } else {
+ if ($MY_COUNTRY) {
+ my $special = $LOCAL_GUESSING{$MY_COUNTRY};
+ if ($special) {
+ my @special = @$special;
+ push(@guess, map { s/\bACME\b/$host/; $_ }
+ @special);
+ } else {
+ push(@guess, "www.$host.$MY_COUNTRY");
+ }
}
+ push(@guess, map "www.$host.$_",
+ "com", "org", "net", "edu", "int");
}
- push(@guess, map "www.$host.$_",
- "com", "org", "net", "edu", "int");
my $guess;
for $guess (@guess) {
- print STDERR "Looking up '$guess'\n" if $DEBUG;
+ print STDERR "uf_urlstr: gethostbyname('$guess')..."
+ if $DEBUG;
if (gethostbyname($guess)) {
+ print STDERR "yes\n" if $DEBUG;
$host = $guess;
last;
}
+ print STDERR "no\n" if $DEBUG;
}
}
$_ = "http://$host$_";