diff options
-rw-r--r-- | URI.pm | 18 | ||||
-rw-r--r-- | t/iri.t | 5 |
2 files changed, 21 insertions, 2 deletions
@@ -270,8 +270,24 @@ sub as_iri } } if ($str =~ s/%([89A-F][0-9A-F])/chr(hex($1))/eg) { + # All this crap because the more obvious: + # + # Encode::decode("UTF-8", $str, sub { sprintf "%%%02X", shift }) + # + # doesn't work. Apparently passing a sub as CHECK only works + # for 'ascii' and similar direct encodings. + require Encode; - return Encode::decode("UTF-8", $str); + my $enc = Encode::find_encoding("UTF-8"); + my $u = ""; + while (length $str) { + $u .= $enc->decode($str, Encode::FB_QUIET()); + if (length $str) { + # escape next char + $u .= URI::Escape::escape_char(substr($str, 0, 1, "")); + } + } + $str = $u; } return $str; } @@ -2,7 +2,7 @@ use utf8; use strict; -use Test::More tests => 10; +use Test::More tests => 11; use URI; @@ -18,6 +18,9 @@ $u = URI->new("http://example.com/Bücher"); is $u, "http://example.com/B%C3%BCcher"; is $u->as_iri, "http://example.com/Bücher"; +$u = URI->new("http://example.com/B%FCcher"); # latin1 encoded stuff +is $u->as_iri, "http://example.com/B%FCcher"; # ...should not be decoded + $u = URI->new("http://➡.ws/"); is $u, "http://xn--hgi.ws/"; is $u->host, "xn--hgi.ws"; |