summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGraham Knop <haarg@haarg.org>2022-10-10 19:40:15 +0200
committerOlaf Alders <olaf@wundersolutions.com>2022-10-10 16:35:44 -0400
commit5206c1c64e54cbe4139cdf412dd7e74755be3cfe (patch)
treee0ea6bf5956488c2b36d1ea877347dadaf2899b9
parent5a628e820f57eae410fcceab949deef90887a384 (diff)
downloaduri-5206c1c64e54cbe4139cdf412dd7e74755be3cfe.tar.gz
fix uri_escape support for \w style character classes
uri_escape accepts a set of characters as its second parameter. This would have some escaping done on it before being put in an eval to generate an an escaping sub. The last release of URI attempted to do extra escaping on this character set. It tried to match the allowed forms of character classes, including a-z and [:alpha:] forms, an escaping everything else. But it didn't allow for character classes like \w. This broke several modules. The original design of the code was written for prehistoric versions of perl that didn't support compiled regexes (qr//). This is why it needed the eval and sub generation. The supported perl versions all support qr// objects, so we can compile using them rather than eval. This means much less needs to be escaped. Specifically, only the [] characters themselves. If we allow through the POSIX class forms ([:alpha:]), escaping all others, we can still be safe but allow all existing forms to be used. This can result in warnings when attempting to use escapes like \Q...\E, which are not valid character class escapes. These warnings are appropriate, so test for them. Some existing tests were expecting any backslash in the input to result in backslashes being escaped. Since we are now allowing all backslash sequences through, this is inappropriate. The tests needed to be changed.
-rw-r--r--Changes3
-rw-r--r--cpanfile2
-rw-r--r--lib/URI/Escape.pm37
-rw-r--r--t/escape.t37
4 files changed, 61 insertions, 18 deletions
diff --git a/Changes b/Changes
index 1ed0111..11f4520 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,9 @@
Revision history for URI
{{$NEXT}}
+ - fix uri_escape allowing \w style character classes in its character set
+ parameter
+
5.13 2022-10-06 16:46:32Z
- Regression test added for a previous bug (5.11) in URI::file (Perlbotics).
diff --git a/cpanfile b/cpanfile
index 0aebb4f..88c9b49 100644
--- a/cpanfile
+++ b/cpanfile
@@ -45,7 +45,9 @@ on 'runtime' => sub {
on 'test' => sub {
requires "File::Spec::Functions" => "0";
requires "File::Temp" => "0";
+ requires "Test::Fatal" => "0";
requires "Test::More" => "0.96";
requires "Test::Needs" => '0';
+ requires "Test::Warnings" => '0';
requires "utf8" => "0";
};
diff --git a/lib/URI/Escape.pm b/lib/URI/Escape.pm
index cb2db1a..c8e1e6b 100644
--- a/lib/URI/Escape.pm
+++ b/lib/URI/Escape.pm
@@ -160,24 +160,31 @@ my %Unsafe = (
sub uri_escape {
my($text, $patn) = @_;
return undef unless defined $text;
+ my $re;
if (defined $patn){
- unless (exists $subst{$patn}) {
- # Because we can't compile the regex we fake it with a cached sub
- my @parts = $patn =~ m/(
- (?: ^ \^? -? )
- | (?: .-. )
- | (?: \[:[^:]+:\] )
- | .
- )/gx;
-
- my $tmp = join '', shift @parts, map { length > 1 ? $_ : quotemeta } @parts;
- eval "\$subst{\$patn} = sub {\$_[0] =~ s/([$tmp])/\$escapes{\$1} || _fail_hi(\$1)/ge; }";
- Carp::croak("uri_escape: $@") if $@;
+ $re = $subst{$patn};
+ if (!defined $re) {
+ $re = $patn;
+ # we need to escape the [] characters, except for those used in
+ # posix classes. if they are prefixed by a backslash, allow them
+ # through unmodified.
+ $re =~ s{(\[:\w+:\])|(\\)?([\[\]]|\\\z)}{
+ defined $1 ? $1 : defined $2 ? "$2$3" : "\\$3"
+ }ge;
+ eval {
+ # disable the warnings here, since they will trigger later
+ # when used, and we only want them to appear once per call,
+ # but every time the same pattern is used.
+ no warnings 'regexp';
+ $re = $subst{$patn} = qr{[$re]};
+ 1;
+ } or Carp::croak("uri_escape: $@");
}
- &{$subst{$patn}}($text);
- } else {
- $text =~ s/($Unsafe{RFC3986})/$escapes{$1} || _fail_hi($1)/ge;
}
+ else {
+ $re = $Unsafe{RFC3986};
+ }
+ $text =~ s/($re)/$escapes{$1} || _fail_hi($1)/ge;
$text;
}
diff --git a/t/escape.t b/t/escape.t
index d78155b..16694dd 100644
--- a/t/escape.t
+++ b/t/escape.t
@@ -2,6 +2,8 @@ use strict;
use warnings;
use Test::More;
+use Test::Warnings qw( :all );
+use Test::Fatal;
use URI::Escape qw( %escapes uri_escape uri_escape_utf8 uri_unescape );
@@ -39,19 +41,19 @@ is
is
uri_escape ('[]\\${}', '][\\${`kill -0 -1`}'),
- '%5B%5D%5C%24%7B%7D',
+ '%5B%5D\\%24%7B%7D',
'it should recognize scalar interpolation injection in unwanted characters',
;
is
uri_escape ('[]\\@{}', '][\\@{`kill -0 -1`}'),
- '%5B%5D%5C%40%7B%7D',
+ '%5B%5D\\%40%7B%7D',
'it should recognize array interpolation injection in unwanted characters',
;
is
uri_escape ('[]\\%{}', '][\\%{`kill -0 -1`}'),
- '%5B%5D%5C%25%7B%7D',
+ '%5B%5D\\%25%7B%7D',
'it should recognize hash interpolation injection in unwanted characters',
;
@@ -73,6 +75,35 @@ is
'it should recognize character groups'
;
+is
+ uri_escape ('abcd-', '\w'),
+ '%61%62%63%64-',
+ 'it should allow character class escapes'
+ ;
+
+is
+ uri_escape ('a/b`]c^', '/-^'),
+ 'a%2Fb`%5Dc%5E',
+ 'regex characters like / and ^ allowed in range'
+ ;
+
+like exception { uri_escape ('abcdef', 'd-c') },
+ qr/Invalid \[\] range "d-c" in regex/,
+ 'invalid range with max less than min throws exception';
+
+like join('', warnings {
+ is
+ uri_escape ('abcdeQE', '\Qabc\E'),
+ '%61%62%63de%51%45',
+ 'it should allow character class escapes'
+ ;
+}), qr{
+ (?-x:Unrecognized escape \\Q in character class passed through in regex)
+ .*
+ (?-x:Unrecognized escape \\E in character class passed through in regex)
+}xs,
+ 'bad escapes emit warnings';
+
is $escapes{"%"}, "%25";
is uri_escape_utf8("|abcå"), "%7Cabc%C3%A5";