diff options
author | Perlbotics <perlbotix@cpan.org> | 2022-06-07 11:18:57 +0200 |
---|---|---|
committer | Olaf Alders <olaf@wundersolutions.com> | 2022-07-04 16:47:40 -0400 |
commit | f4183117b1b19fc66918a2ea0b8db0527884870e (patch) | |
tree | fed37d269ba0b623747133eaeaa76ee3848e1f2d /lib | |
parent | a75a5f8e237be8be1e6d8c3ae5bb72c6ff4663d3 (diff) | |
download | uri-f4183117b1b19fc66918a2ea0b8db0527884870e.tar.gz |
uri-test: use local implementation (./lib) while authoring module
bugfix #99: Square brackets in path element not escaped.
Hopefully fixes issue#99 w/o breaking new stuff.
https://github.com/libwww-perl/URI/issues/99
Setting the environment variable URI_RESERVED_SQUARE_BRACKETS=1
restores the old behaviour (5.10 and before).
See section ENVIRONMENT VARIABLES of the URI.pm perldoc.
Tests for issue#99: Square brackets in path element not escaped.
Not complete yet. See #TODO.
t/old-base.t adapted to match new and legacy behavior
Fixed typos in POD of URI.pm
more tests, esp. getter validation
userinfo(): setter escapes according to RFC 3987
authority(): setter escapes userinfo part separately from host part
authority(): IPv6 detection for host part (should use Regexp::IPv6)
URI.pm: POD fixed
uri-test: explicit use of ./lib replaced by visual warning
See: https://github.com/libwww-perl/URI/pull/100#discussion_r903497606
POD corrections and clarifications.
See: https://github.com/libwww-perl/URI/pull/100#discussion_r895442979
https://github.com/libwww-perl/URI/pull/100#discussion_r895443302
https://github.com/libwww-perl/URI/pull/100#discussion_r903515373
Unused global variable removed.
See: https://github.com/libwww-perl/URI/pull/100#discussion_r895978559
_generic.pm: uses Regexp::IPv6 if installed, fallback otherwise
The fallback regexp is just a check for the minimum amount of
legal characters. Extra check is used to ensure that at least
two colons are present.
See: https://github.com/libwww-perl/URI/pull/100#discussion_r903810999
Another try to get POD spehlink rait.
cpanfile: Regexp::IPv6 is suggested at runtime
Spelling fixed and comments updated. Thanks, @simbabque.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/URI.pm | 59 | ||||
-rw-r--r-- | lib/URI/_generic.pm | 31 | ||||
-rw-r--r-- | lib/URI/_server.pm | 7 |
3 files changed, 91 insertions, 6 deletions
@@ -5,16 +5,21 @@ use warnings; our $VERSION = '5.11'; +# 1=version 5.10 and earlier; 0=version 5.11 and later +use constant HAS_RESERVED_SQUARE_BRACKETS => $ENV{URI_HAS_RESERVED_SQUARE_BRACKETS} ? 1 : 0; + our ($ABS_REMOTE_LEADING_DOTS, $ABS_ALLOW_RELATIVE_SCHEME, $DEFAULT_QUERY_FORM_DELIMITER); my %implements; # mapping from scheme to implementor class # Some "official" character classes -our $reserved = q(;/?:@&=+$,[]); +our $reserved = HAS_RESERVED_SQUARE_BRACKETS ? q(;/?:@&=+$,[]) : q(;/?:@&=+$,); our $mark = q(-_.!~*'()); #'; emacs our $unreserved = "A-Za-z0-9\Q$mark\E"; our $uric = quotemeta($reserved) . $unreserved . "%"; +our $uric4host = $uric . ( HAS_RESERVED_SQUARE_BRACKETS ? '' : quotemeta( q([]) ) ); +our $uric4user = quotemeta( q{!$'()*,;:._~%-+=%&} ) . "A-Za-z0-9" . ( HAS_RESERVED_SQUARE_BRACKETS ? quotemeta( q([]) ) : '' ); # RFC-3987: iuserinfo w/o UTF our $scheme_re = '[a-zA-Z][a-zA-Z0-9.+\-]*'; @@ -86,10 +91,34 @@ sub _init } +#-- Version: 5.11+ +# Since the complete URI will be percent-encoded including '[' and ']', +# we selectively unescape square brackets from the authority/host part of the URI. +# Derived modules that implement _uric_escape() should take this into account +# if they do not rely on URI::_uric_escape(). +# No unescaping is performed for the userinfo@ part of the authority part. +sub _fix_uric_escape_for_host_part { + return if HAS_RESERVED_SQUARE_BRACKETS; + return if $_[0] !~ /%/; + + if ($_[0] =~ m,^((?:$URI::scheme_re:)?)//([^/?\#]*)(.*)$,os) { + my $orig = $2; + my ($user, $host) = $orig =~ /^(.*@)?([^@]*)$/; + $user ||= ''; + my $port = $host =~ s/(:\d+)$// ? $1 : ''; + #MAINT: die() here if scheme indicates TCP/UDP and port is out of range [0..65535] ? + $host =~ s/\%5B/[/gi; + $host =~ s/\%5D/]/gi; + $_[0] =~ s/\Q$orig\E/$user$host$port/; + } +} + + sub _uric_escape { my($class, $str) = @_; $str =~ s*([^$uric\#])* URI::Escape::escape_char($1) *ego; + _fix_uric_escape_for_host_part( $str ); utf8::downgrade($str); return $str; } @@ -1087,6 +1116,34 @@ delimited by ";" instead of "&" which is the default. =back +=head1 ENVIRONMENT VARIABLES + +=over 4 + +=item URI_HAS_RESERVED_SQUARE_BRACKETS + +Before version 5.11, URI treated square brackets as reserved characters +throughout the whole URI string. However, these brackets are reserved +only within the authority/host part of the URI and nowhere else (RFC 3986). + +Starting with version 5.11, URI takes this distinction into account. +Setting the environment variable C<URI_HAS_RESERVED_SQUARE_BRACKETS> +(programmatically or via the shell), restores the old behavior. + + #-- restore 5.10 behavior programmatically + BEGIN { + $ENV{URI_HAS_RESERVED_SQUARE_BRACKETS} = 1; + } + use URI (); + +I<Note>: This environment variable is just used during initialization and has to be set + I<before> module URI is used/required. Changing it at run time has no effect. + +Its value can be checked programmatically by accessing the constant +C<URI::HAS_RESERVED_SQUARE_BRACKETS>. + +=back + =head1 BUGS There are some things that are not quite right: diff --git a/lib/URI/_generic.pm b/lib/URI/_generic.pm index d03377f..8389cd6 100644 --- a/lib/URI/_generic.pm +++ b/lib/URI/_generic.pm @@ -10,11 +10,31 @@ use Carp (); our $VERSION = '5.11'; -my $ACHAR = $URI::uric; $ACHAR =~ s,\\[/?],,g; -my $PCHAR = $URI::uric; $PCHAR =~ s,\\[?],,g; +my $ACHAR = URI::HAS_RESERVED_SQUARE_BRACKETS ? $URI::uric : $URI::uric4host; $ACHAR =~ s,\\[/?],,g; +my $PCHAR = $URI::uric; $PCHAR =~ s,\\[?],,g; sub _no_scheme_ok { 1 } +our $IPv6_re; + +sub _looks_like_raw_ip6_address { + my $addr = shift; + + if ( !$IPv6_re ) { #-- lazy / runs once / use Regexp::IPv6 if installed + eval { + require Regexp::IPv6; + Regexp::IPv6->import( qw($IPv6_re) ); + 1; + } || do { $IPv6_re = qr/[:0-9a-f]{3,}/; }; #-- fallback: unambitious guess + } + + return 0 unless $addr; + return 0 if $addr =~ tr/:/:/ < 2; #-- fallback must not create false positive for IPv4:Port = 0:0 + return 1 if $addr =~ /^$IPv6_re$/i; + return 0; +} + + sub authority { my $self = shift; @@ -26,6 +46,13 @@ sub authority my $rest = $3; if (defined $auth) { $auth =~ s/([^$ACHAR])/ URI::Escape::escape_char($1)/ego; + if ( my ($user, $host) = $auth =~ /^(.*@)?([^@]+)$/ ) { #-- special escape userinfo part + $user ||= ''; + $user =~ s/([^$URI::uric4user])/ URI::Escape::escape_char($1)/ego; + $user =~ s/%40$/\@/; # recover final '@' + $host = "[$host]" if _looks_like_raw_ip6_address( $host ); + $auth = $user . $host; + } utf8::downgrade($auth); $$self .= "//$auth"; } diff --git a/lib/URI/_server.pm b/lib/URI/_server.pm index 16ff524..69207a8 100644 --- a/lib/URI/_server.pm +++ b/lib/URI/_server.pm @@ -23,7 +23,8 @@ sub _uric_escape { } sub _host_escape { - return unless $_[0] =~ /[^$URI::uric]/; + return if URI::HAS_RESERVED_SQUARE_BRACKETS and $_[0] !~ /[^$URI::uric]/; + return if !URI::HAS_RESERVED_SQUARE_BRACKETS and $_[0] !~ /[^$URI::uric4host]/; eval { require URI::_idna; $_[0] = URI::_idna::encode($_[0]); @@ -59,8 +60,8 @@ sub userinfo $new =~ s/.*@//; # remove old stuff my $ui = shift; if (defined $ui) { - $ui =~ s/@/%40/g; # protect @ - $new = "$ui\@$new"; + $ui =~ s/([^$URI::uric4user])/ URI::Escape::escape_char($1)/ego; + $new = "$ui\@$new"; } $self->authority($new); } |