summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorPerlbotics <perlbotix@cpan.org>2022-06-07 11:18:57 +0200
committerOlaf Alders <olaf@wundersolutions.com>2022-07-04 16:47:40 -0400
commitf4183117b1b19fc66918a2ea0b8db0527884870e (patch)
treefed37d269ba0b623747133eaeaa76ee3848e1f2d /lib
parenta75a5f8e237be8be1e6d8c3ae5bb72c6ff4663d3 (diff)
downloaduri-f4183117b1b19fc66918a2ea0b8db0527884870e.tar.gz
uri-test: use local implementation (./lib) while authoring module
bugfix #99: Square brackets in path element not escaped. Hopefully fixes issue#99 w/o breaking new stuff. https://github.com/libwww-perl/URI/issues/99 Setting the environment variable URI_RESERVED_SQUARE_BRACKETS=1 restores the old behaviour (5.10 and before). See section ENVIRONMENT VARIABLES of the URI.pm perldoc. Tests for issue#99: Square brackets in path element not escaped. Not complete yet. See #TODO. t/old-base.t adapted to match new and legacy behavior Fixed typos in POD of URI.pm more tests, esp. getter validation userinfo(): setter escapes according to RFC 3987 authority(): setter escapes userinfo part separately from host part authority(): IPv6 detection for host part (should use Regexp::IPv6) URI.pm: POD fixed uri-test: explicit use of ./lib replaced by visual warning See: https://github.com/libwww-perl/URI/pull/100#discussion_r903497606 POD corrections and clarifications. See: https://github.com/libwww-perl/URI/pull/100#discussion_r895442979 https://github.com/libwww-perl/URI/pull/100#discussion_r895443302 https://github.com/libwww-perl/URI/pull/100#discussion_r903515373 Unused global variable removed. See: https://github.com/libwww-perl/URI/pull/100#discussion_r895978559 _generic.pm: uses Regexp::IPv6 if installed, fallback otherwise The fallback regexp is just a check for the minimum amount of legal characters. Extra check is used to ensure that at least two colons are present. See: https://github.com/libwww-perl/URI/pull/100#discussion_r903810999 Another try to get POD spehlink rait. cpanfile: Regexp::IPv6 is suggested at runtime Spelling fixed and comments updated. Thanks, @simbabque.
Diffstat (limited to 'lib')
-rw-r--r--lib/URI.pm59
-rw-r--r--lib/URI/_generic.pm31
-rw-r--r--lib/URI/_server.pm7
3 files changed, 91 insertions, 6 deletions
diff --git a/lib/URI.pm b/lib/URI.pm
index eb02a72..9370720 100644
--- a/lib/URI.pm
+++ b/lib/URI.pm
@@ -5,16 +5,21 @@ use warnings;
our $VERSION = '5.11';
+# 1=version 5.10 and earlier; 0=version 5.11 and later
+use constant HAS_RESERVED_SQUARE_BRACKETS => $ENV{URI_HAS_RESERVED_SQUARE_BRACKETS} ? 1 : 0;
+
our ($ABS_REMOTE_LEADING_DOTS, $ABS_ALLOW_RELATIVE_SCHEME, $DEFAULT_QUERY_FORM_DELIMITER);
my %implements; # mapping from scheme to implementor class
# Some "official" character classes
-our $reserved = q(;/?:@&=+$,[]);
+our $reserved = HAS_RESERVED_SQUARE_BRACKETS ? q(;/?:@&=+$,[]) : q(;/?:@&=+$,);
our $mark = q(-_.!~*'()); #'; emacs
our $unreserved = "A-Za-z0-9\Q$mark\E";
our $uric = quotemeta($reserved) . $unreserved . "%";
+our $uric4host = $uric . ( HAS_RESERVED_SQUARE_BRACKETS ? '' : quotemeta( q([]) ) );
+our $uric4user = quotemeta( q{!$'()*,;:._~%-+=%&} ) . "A-Za-z0-9" . ( HAS_RESERVED_SQUARE_BRACKETS ? quotemeta( q([]) ) : '' ); # RFC-3987: iuserinfo w/o UTF
our $scheme_re = '[a-zA-Z][a-zA-Z0-9.+\-]*';
@@ -86,10 +91,34 @@ sub _init
}
+#-- Version: 5.11+
+# Since the complete URI will be percent-encoded including '[' and ']',
+# we selectively unescape square brackets from the authority/host part of the URI.
+# Derived modules that implement _uric_escape() should take this into account
+# if they do not rely on URI::_uric_escape().
+# No unescaping is performed for the userinfo@ part of the authority part.
+sub _fix_uric_escape_for_host_part {
+ return if HAS_RESERVED_SQUARE_BRACKETS;
+ return if $_[0] !~ /%/;
+
+ if ($_[0] =~ m,^((?:$URI::scheme_re:)?)//([^/?\#]*)(.*)$,os) {
+ my $orig = $2;
+ my ($user, $host) = $orig =~ /^(.*@)?([^@]*)$/;
+ $user ||= '';
+ my $port = $host =~ s/(:\d+)$// ? $1 : '';
+ #MAINT: die() here if scheme indicates TCP/UDP and port is out of range [0..65535] ?
+ $host =~ s/\%5B/[/gi;
+ $host =~ s/\%5D/]/gi;
+ $_[0] =~ s/\Q$orig\E/$user$host$port/;
+ }
+}
+
+
sub _uric_escape
{
my($class, $str) = @_;
$str =~ s*([^$uric\#])* URI::Escape::escape_char($1) *ego;
+ _fix_uric_escape_for_host_part( $str );
utf8::downgrade($str);
return $str;
}
@@ -1087,6 +1116,34 @@ delimited by ";" instead of "&" which is the default.
=back
+=head1 ENVIRONMENT VARIABLES
+
+=over 4
+
+=item URI_HAS_RESERVED_SQUARE_BRACKETS
+
+Before version 5.11, URI treated square brackets as reserved characters
+throughout the whole URI string. However, these brackets are reserved
+only within the authority/host part of the URI and nowhere else (RFC 3986).
+
+Starting with version 5.11, URI takes this distinction into account.
+Setting the environment variable C<URI_HAS_RESERVED_SQUARE_BRACKETS>
+(programmatically or via the shell), restores the old behavior.
+
+ #-- restore 5.10 behavior programmatically
+ BEGIN {
+ $ENV{URI_HAS_RESERVED_SQUARE_BRACKETS} = 1;
+ }
+ use URI ();
+
+I<Note>: This environment variable is just used during initialization and has to be set
+ I<before> module URI is used/required. Changing it at run time has no effect.
+
+Its value can be checked programmatically by accessing the constant
+C<URI::HAS_RESERVED_SQUARE_BRACKETS>.
+
+=back
+
=head1 BUGS
There are some things that are not quite right:
diff --git a/lib/URI/_generic.pm b/lib/URI/_generic.pm
index d03377f..8389cd6 100644
--- a/lib/URI/_generic.pm
+++ b/lib/URI/_generic.pm
@@ -10,11 +10,31 @@ use Carp ();
our $VERSION = '5.11';
-my $ACHAR = $URI::uric; $ACHAR =~ s,\\[/?],,g;
-my $PCHAR = $URI::uric; $PCHAR =~ s,\\[?],,g;
+my $ACHAR = URI::HAS_RESERVED_SQUARE_BRACKETS ? $URI::uric : $URI::uric4host; $ACHAR =~ s,\\[/?],,g;
+my $PCHAR = $URI::uric; $PCHAR =~ s,\\[?],,g;
sub _no_scheme_ok { 1 }
+our $IPv6_re;
+
+sub _looks_like_raw_ip6_address {
+ my $addr = shift;
+
+ if ( !$IPv6_re ) { #-- lazy / runs once / use Regexp::IPv6 if installed
+ eval {
+ require Regexp::IPv6;
+ Regexp::IPv6->import( qw($IPv6_re) );
+ 1;
+ } || do { $IPv6_re = qr/[:0-9a-f]{3,}/; }; #-- fallback: unambitious guess
+ }
+
+ return 0 unless $addr;
+ return 0 if $addr =~ tr/:/:/ < 2; #-- fallback must not create false positive for IPv4:Port = 0:0
+ return 1 if $addr =~ /^$IPv6_re$/i;
+ return 0;
+}
+
+
sub authority
{
my $self = shift;
@@ -26,6 +46,13 @@ sub authority
my $rest = $3;
if (defined $auth) {
$auth =~ s/([^$ACHAR])/ URI::Escape::escape_char($1)/ego;
+ if ( my ($user, $host) = $auth =~ /^(.*@)?([^@]+)$/ ) { #-- special escape userinfo part
+ $user ||= '';
+ $user =~ s/([^$URI::uric4user])/ URI::Escape::escape_char($1)/ego;
+ $user =~ s/%40$/\@/; # recover final '@'
+ $host = "[$host]" if _looks_like_raw_ip6_address( $host );
+ $auth = $user . $host;
+ }
utf8::downgrade($auth);
$$self .= "//$auth";
}
diff --git a/lib/URI/_server.pm b/lib/URI/_server.pm
index 16ff524..69207a8 100644
--- a/lib/URI/_server.pm
+++ b/lib/URI/_server.pm
@@ -23,7 +23,8 @@ sub _uric_escape {
}
sub _host_escape {
- return unless $_[0] =~ /[^$URI::uric]/;
+ return if URI::HAS_RESERVED_SQUARE_BRACKETS and $_[0] !~ /[^$URI::uric]/;
+ return if !URI::HAS_RESERVED_SQUARE_BRACKETS and $_[0] !~ /[^$URI::uric4host]/;
eval {
require URI::_idna;
$_[0] = URI::_idna::encode($_[0]);
@@ -59,8 +60,8 @@ sub userinfo
$new =~ s/.*@//; # remove old stuff
my $ui = shift;
if (defined $ui) {
- $ui =~ s/@/%40/g; # protect @
- $new = "$ui\@$new";
+ $ui =~ s/([^$URI::uric4user])/ URI::Escape::escape_char($1)/ego;
+ $new = "$ui\@$new";
}
$self->authority($new);
}