summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Changes3
-rw-r--r--cpanfile2
-rw-r--r--lib/URI/Escape.pm37
-rw-r--r--t/escape.t37
4 files changed, 61 insertions, 18 deletions
diff --git a/Changes b/Changes
index 1ed0111..11f4520 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,9 @@
Revision history for URI
{{$NEXT}}
+ - fix uri_escape allowing \w style character classes in its character set
+ parameter
+
5.13 2022-10-06 16:46:32Z
- Regression test added for a previous bug (5.11) in URI::file (Perlbotics).
diff --git a/cpanfile b/cpanfile
index 0aebb4f..88c9b49 100644
--- a/cpanfile
+++ b/cpanfile
@@ -45,7 +45,9 @@ on 'runtime' => sub {
on 'test' => sub {
requires "File::Spec::Functions" => "0";
requires "File::Temp" => "0";
+ requires "Test::Fatal" => "0";
requires "Test::More" => "0.96";
requires "Test::Needs" => '0';
+ requires "Test::Warnings" => '0';
requires "utf8" => "0";
};
diff --git a/lib/URI/Escape.pm b/lib/URI/Escape.pm
index cb2db1a..c8e1e6b 100644
--- a/lib/URI/Escape.pm
+++ b/lib/URI/Escape.pm
@@ -160,24 +160,31 @@ my %Unsafe = (
sub uri_escape {
my($text, $patn) = @_;
return undef unless defined $text;
+ my $re;
if (defined $patn){
- unless (exists $subst{$patn}) {
- # Because we can't compile the regex we fake it with a cached sub
- my @parts = $patn =~ m/(
- (?: ^ \^? -? )
- | (?: .-. )
- | (?: \[:[^:]+:\] )
- | .
- )/gx;
-
- my $tmp = join '', shift @parts, map { length > 1 ? $_ : quotemeta } @parts;
- eval "\$subst{\$patn} = sub {\$_[0] =~ s/([$tmp])/\$escapes{\$1} || _fail_hi(\$1)/ge; }";
- Carp::croak("uri_escape: $@") if $@;
+ $re = $subst{$patn};
+ if (!defined $re) {
+ $re = $patn;
+ # we need to escape the [] characters, except for those used in
+ # posix classes. if they are prefixed by a backslash, allow them
+ # through unmodified.
+ $re =~ s{(\[:\w+:\])|(\\)?([\[\]]|\\\z)}{
+ defined $1 ? $1 : defined $2 ? "$2$3" : "\\$3"
+ }ge;
+ eval {
+ # disable the warnings here, since they will trigger later
+ # when used, and we only want them to appear once per call,
+ # but every time the same pattern is used.
+ no warnings 'regexp';
+ $re = $subst{$patn} = qr{[$re]};
+ 1;
+ } or Carp::croak("uri_escape: $@");
}
- &{$subst{$patn}}($text);
- } else {
- $text =~ s/($Unsafe{RFC3986})/$escapes{$1} || _fail_hi($1)/ge;
}
+ else {
+ $re = $Unsafe{RFC3986};
+ }
+ $text =~ s/($re)/$escapes{$1} || _fail_hi($1)/ge;
$text;
}
diff --git a/t/escape.t b/t/escape.t
index d78155b..16694dd 100644
--- a/t/escape.t
+++ b/t/escape.t
@@ -2,6 +2,8 @@ use strict;
use warnings;
use Test::More;
+use Test::Warnings qw( :all );
+use Test::Fatal;
use URI::Escape qw( %escapes uri_escape uri_escape_utf8 uri_unescape );
@@ -39,19 +41,19 @@ is
is
uri_escape ('[]\\${}', '][\\${`kill -0 -1`}'),
- '%5B%5D%5C%24%7B%7D',
+ '%5B%5D\\%24%7B%7D',
'it should recognize scalar interpolation injection in unwanted characters',
;
is
uri_escape ('[]\\@{}', '][\\@{`kill -0 -1`}'),
- '%5B%5D%5C%40%7B%7D',
+ '%5B%5D\\%40%7B%7D',
'it should recognize array interpolation injection in unwanted characters',
;
is
uri_escape ('[]\\%{}', '][\\%{`kill -0 -1`}'),
- '%5B%5D%5C%25%7B%7D',
+ '%5B%5D\\%25%7B%7D',
'it should recognize hash interpolation injection in unwanted characters',
;
@@ -73,6 +75,35 @@ is
'it should recognize character groups'
;
+is
+ uri_escape ('abcd-', '\w'),
+ '%61%62%63%64-',
+ 'it should allow character class escapes'
+ ;
+
+is
+ uri_escape ('a/b`]c^', '/-^'),
+ 'a%2Fb`%5Dc%5E',
+ 'regex characters like / and ^ allowed in range'
+ ;
+
+like exception { uri_escape ('abcdef', 'd-c') },
+ qr/Invalid \[\] range "d-c" in regex/,
+ 'invalid range with max less than min throws exception';
+
+like join('', warnings {
+ is
+ uri_escape ('abcdeQE', '\Qabc\E'),
+ '%61%62%63de%51%45',
+ 'it should allow character class escapes'
+ ;
+}), qr{
+ (?-x:Unrecognized escape \\Q in character class passed through in regex)
+ .*
+ (?-x:Unrecognized escape \\E in character class passed through in regex)
+}xs,
+ 'bad escapes emit warnings';
+
is $escapes{"%"}, "%25";
is uri_escape_utf8("|abcå"), "%7Cabc%C3%A5";